source: CLRX/CLRadeonExtender/trunk/tests/amdasm/AsmGalliumFormat.cpp @ 3575

Last change on this file since 3575 was 3575, checked in by matszpk, 2 years ago

CLRadeonExtender: Change Copyright dates.

File size: 33.7 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2018 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19
20#include <CLRX/Config.h>
21#include <iostream>
22#include <cstdio>
23#include <sstream>
24#include <algorithm>
25#include <memory>
26#include <CLRX/amdasm/Assembler.h>
27#include "../TestUtils.h"
28
29using namespace CLRX;
30
31static const char* galliumArgTypeTbl[] =
32{
33    "scalar", "constant", "global", "local", "image2dro", "image2dwr",
34    "image3dro", "image3dwr", "sampler"
35};
36
37static const char* galliumArgSemanticTbl[] =
38{   "general", "griddim", "gridoffset" };
39
40// helper for printing value from kernel config (print default and notsupplied)
41static std::string confValueToString(uint32_t val)
42{
43    if (val == BINGEN_DEFAULT)
44        return "default";
45    if (val == BINGEN_NOTSUPPLIED)
46        return "notsup";
47    std::ostringstream oss;
48    oss << val;
49    return oss.str();
50}
51
52// print hex data or nullptr
53static void printHexData(std::ostream& os, cxuint indentLevel, size_t size,
54             const cxbyte* data)
55{
56    if (data==nullptr)
57    {
58        for (cxuint j = 0; j < indentLevel; j++)
59            os << "  ";
60        os << "nullptr\n";
61        return;
62    }
63    for (size_t i = 0; i < size; i++)
64    {
65        if ((i&31)==0)
66            for (cxuint j = 0; j < indentLevel; j++)
67                os << "  ";
68        char buf[10];
69        snprintf(buf, 10, "%02x", cxuint(data[i]));
70        os << buf;
71        if ((i&31)==31 || i+1 == size)
72            os << '\n';
73    }
74}
75
76// print dump of gallium output to stream for comparing with testcase
77static void printGalliumOutput(std::ostream& os, const GalliumInput* output, bool amdHsa)
78{
79    os << "GalliumBinDump:" << std::endl;
80    for (const GalliumKernelInput& kernel: output->kernels)
81    {
82        os << "  Kernel: name=" << kernel.kernelName << ", " <<
83                "offset=" << kernel.offset << "\n";
84        if (!kernel.useConfig)
85        {
86            // if not configuration, then print same prog info entries (only old gallium)
87            os << "    ProgInfo: ";
88            for (cxuint i = 0; i < 3; i++)
89                os << "0x" << std::hex << kernel.progInfo[i].address <<
90                        "=0x" << kernel.progInfo[i].value << ((i==2)?"\n":", ");
91            os << std::dec;
92        }
93        else
94        {
95            // print kernel config
96            const GalliumKernelConfig& config = kernel.config;
97            os << "    Config:\n";
98            os << "      dims=" << confValueToString(config.dimMask) << ", "
99                    "SGPRS=" << confValueToString(config.usedSGPRsNum) << ", "
100                    "VGPRS=" << confValueToString(config.usedVGPRsNum) << ", "
101                    "pgmRSRC2=" << std::hex << "0x" << config.pgmRSRC2 << ", "
102                    "ieeeMode=0x" << cxuint(config.ieeeMode) << "\n      "
103                    "floatMode=0x" << cxuint(config.floatMode) << std::dec << ", "
104                    "priority=" << cxuint(config.priority) << ", "
105                    "localSize=" << config.localSize << ", "
106                    "scratchBuffer=" << config.scratchBufferSize << std::endl;
107            if (amdHsa)
108            {
109                // print also AMD HSA configuration
110                const AmdHsaKernelConfig& config =
111                    *reinterpret_cast<const AmdHsaKernelConfig*>(
112                                output->code + kernel.offset);
113                os << "    AMD HSA Config:\n"
114                    "      amdCodeVersion=" << ULEV(config.amdCodeVersionMajor) << "." <<
115                        ULEV(config.amdCodeVersionMajor) << "\n"
116                    "      amdMachine=" << ULEV(config.amdMachineKind) << ":" <<
117                        ULEV(config.amdMachineMajor) << ":" <<
118                        ULEV(config.amdMachineMinor) << ":" <<
119                        ULEV(config.amdMachineStepping) << "\n"
120                    "      kernelCodeEntryOffset=" <<
121                        ULEV(config.kernelCodeEntryOffset) << "\n"
122                    "      kernelCodePrefetchOffset=" <<
123                        ULEV(config.kernelCodePrefetchOffset) << "\n"
124                    "      kernelCodePrefetchSize=" <<
125                            ULEV(config.kernelCodePrefetchSize) << "\n"
126                    "      maxScrachBackingMemorySize=" <<
127                        ULEV(config.maxScrachBackingMemorySize) << "\n"
128                    "      computePgmRsrc1=0x" << std::hex <<
129                            ULEV(config.computePgmRsrc1) << "\n"
130                    "      computePgmRsrc2=0x" << ULEV(config.computePgmRsrc2) << "\n"
131                    "      enableSgprRegisterFlags=0x" <<
132                        ULEV(config.enableSgprRegisterFlags) << "\n"
133                    "      enableFeatureFlags=0x" <<
134                        ULEV(config.enableFeatureFlags) << std::dec << "\n"
135                    "      workitemPrivateSegmentSize=" <<
136                        ULEV(config.workitemPrivateSegmentSize) << "\n"
137                    "      workgroupGroupSegmentSize=" <<
138                        ULEV(config.workgroupGroupSegmentSize) << "\n"
139                    "      gdsSegmentSize=" << ULEV(config.gdsSegmentSize) << "\n"
140                    "      kernargSegmentSize=" << ULEV(config.kernargSegmentSize) << "\n"
141                    "      workgroupFbarrierCount=" <<
142                            ULEV(config.workgroupFbarrierCount) << "\n"
143                    "      wavefrontSgprCount=" << ULEV(config.wavefrontSgprCount) << "\n"
144                    "      workitemVgprCount=" << ULEV(config.workitemVgprCount) << "\n"
145                    "      reservedVgprFirst=" << ULEV(config.reservedVgprFirst) << "\n"
146                    "      reservedVgprCount=" << ULEV(config.reservedVgprCount) << "\n"
147                    "      reservedSgprFirst=" << ULEV(config.reservedSgprFirst) << "\n"
148                    "      reservedSgprCount=" << ULEV(config.reservedSgprCount) << "\n"
149                    "      debugWavefrontPrivateSegmentOffsetSgpr=" <<
150                        ULEV(config.debugWavefrontPrivateSegmentOffsetSgpr) << "\n"
151                    "      debugPrivateSegmentBufferSgpr=" <<
152                        ULEV(config.debugPrivateSegmentBufferSgpr) << "\n"
153                    "      kernargSegmentAlignment=" << 
154                        cxuint(config.kernargSegmentAlignment) << "\n"
155                    "      groupSegmentAlignment=" <<
156                        cxuint(config.groupSegmentAlignment) << "\n"
157                    "      privateSegmentAlignment=" <<
158                        cxuint(config.privateSegmentAlignment) << "\n"
159                    "      wavefrontSize=" << cxuint(config.wavefrontSize) << "\n"
160                    "      callConvention=0x" << std::hex <<
161                        ULEV(config.callConvention) << "\n"
162                    "      runtimeLoaderKernelSymbol=0x" <<
163                        ULEV(config.runtimeLoaderKernelSymbol) << std::dec << "\n";
164                os << "      ControlDirective:\n";
165                printHexData(os, 3, 128, config.controlDirective);
166            }
167        }
168        for (const GalliumArgInfo& arg: kernel.argInfos)
169        {
170            os << "    Arg: " << galliumArgTypeTbl[cxuint(arg.type)] << ", " <<
171                    ((arg.signExtended) ? "true" : "false") << ", " <<
172                    galliumArgSemanticTbl[cxuint(arg.semantic)] << ", " <<
173                    "size=" << arg.size << ", tgtSize=" << arg.targetSize << ", " <<
174                    "tgtAlign=" << arg.targetAlign << "\n";
175        }
176        os.flush();
177    }
178    // scratch relocations
179    if (!output->scratchRelocs.empty())
180    {
181        os << "  Scratch relocations:\n";
182        for (const GalliumScratchReloc& rel: output->scratchRelocs)
183            os << "    Rel: offset=" << rel.offset << ", type: " << rel.type << "\n";
184    }
185    // other data from output
186    os << "  Comment:\n";
187    printHexData(os, 1, output->commentSize, (const cxbyte*)output->comment);
188    os << "  GlobalData:\n";
189    printHexData(os, 1, output->globalDataSize, output->globalData);
190    os << "  Code:\n";
191    printHexData(os, 1, output->codeSize, output->code);
192   
193    // print extra sections when supplied
194    for (BinSection section: output->extraSections)
195    {
196        os << "  Section " << section.name << ", type=" << section.type <<
197                        ", flags=" << section.flags << ":\n";
198        printHexData(os, 1, section.size, section.data);
199    }
200    // print extra symbols when supplied
201    for (BinSymbol symbol: output->extraSymbols)
202        os << "  Symbol: name=" << symbol.name << ", value=" << symbol.value <<
203                ", size=" << symbol.size << ", section=" << symbol.sectionId << "\n";
204    os.flush();
205}
206
207struct AsmTestCase
208{
209    const char* input;
210    const char* dump;
211    const char* errors;
212    bool good;
213};
214
215static const AsmTestCase asmTestCases1Tbl[] =
216{
217    /* 0 - gallium  */
218    {
219        R"ffDXD(            .gallium
220            .kernel firstKernel
221            .args
222            .arg scalar,8
223            .arg local,4
224            .arg constant,4
225            .arg global,4
226            .arg image3d_rd,4
227            .arg image2d_rd,4
228            .arg image3d_wr,4
229            .arg image2d_wr,4
230            .arg sampler,8
231            .arg scalar,8, , ,sext
232            .arg scalar,8, , ,sext,general
233            .arg scalar,8, 18,2
234            .arg scalar,20, 18,2
235            .arg scalar,8, , , ,griddim
236            .arg griddim,4
237            .arg gridoffset,4
238            .arg scalar, 11111111111, 22222222222222, 4
239            .section .comment
240            .ascii "nocomments"
241            .globaldata
242            .byte 0xf0,0xfd,0x3d,0x44
243            .kernel secondKernel
244            .proginfo
245            .entry 12,22
246            .entry 14,288
247            .entry 16,160
248            .args
249            .arg scalar,4
250            .arg scalar,4
251            .arg griddim,4
252            .arg gridoffset,4
253            .kernel thirdKernel
254            .proginfo
255            .entry 0xfffffaaaaa, 0x12233
256            .entry 0xff, 0x111223030
257            .entry 1,2
258            .text
259firstKernel: .byte 1,22,3,4
260            .p2align 4
261secondKernel:.byte 77,76,75,90,11
262thirdKernel:
263            .section .info1
264            .ascii "noinfo"
265            .section .infox
266            .ascii "refer to some link"
267            .section .softX ,"awx",@nobits
268            .section .softy ,"x",@note
269            .section .softz ,"a",@progbits
270            .section .softX ,"a",@nobits)ffDXD",
271        /* dump */
272        R"ffDXD(GalliumBinDump:
273  Kernel: name=firstKernel, offset=0
274    ProgInfo: 0xb848=0xc0000, 0xb84c=0x1788, 0xb860=0x0
275    Arg: scalar, false, general, size=8, tgtSize=8, tgtAlign=8
276    Arg: local, false, general, size=4, tgtSize=4, tgtAlign=4
277    Arg: constant, false, general, size=4, tgtSize=4, tgtAlign=4
278    Arg: global, false, general, size=4, tgtSize=4, tgtAlign=4
279    Arg: image3dro, false, general, size=4, tgtSize=4, tgtAlign=4
280    Arg: image2dro, false, general, size=4, tgtSize=4, tgtAlign=4
281    Arg: image3dwr, false, general, size=4, tgtSize=4, tgtAlign=4
282    Arg: image2dwr, false, general, size=4, tgtSize=4, tgtAlign=4
283    Arg: sampler, false, general, size=8, tgtSize=8, tgtAlign=8
284    Arg: scalar, true, general, size=8, tgtSize=8, tgtAlign=8
285    Arg: scalar, true, general, size=8, tgtSize=8, tgtAlign=8
286    Arg: scalar, false, general, size=8, tgtSize=18, tgtAlign=2
287    Arg: scalar, false, general, size=20, tgtSize=18, tgtAlign=2
288    Arg: scalar, false, griddim, size=8, tgtSize=8, tgtAlign=8
289    Arg: scalar, false, griddim, size=4, tgtSize=4, tgtAlign=4
290    Arg: scalar, false, gridoffset, size=4, tgtSize=4, tgtAlign=4
291    Arg: scalar, false, general, size=2521176519, tgtSize=61432718, tgtAlign=4
292  Kernel: name=secondKernel, offset=16
293    ProgInfo: 0xc=0x16, 0xe=0x120, 0x10=0xa0
294    Arg: scalar, false, general, size=4, tgtSize=4, tgtAlign=4
295    Arg: scalar, false, general, size=4, tgtSize=4, tgtAlign=4
296    Arg: scalar, false, griddim, size=4, tgtSize=4, tgtAlign=4
297    Arg: scalar, false, gridoffset, size=4, tgtSize=4, tgtAlign=4
298  Kernel: name=thirdKernel, offset=21
299    ProgInfo: 0xfffaaaaa=0x12233, 0xff=0x11223030, 0x1=0x2
300  Comment:
301  6e6f636f6d6d656e7473
302  GlobalData:
303  f0fd3d44
304  Code:
305  01160304000080bf000080bf000080bf4d4c4b5a0b
306  Section .info1, type=1, flags=0:
307  6e6f696e666f
308  Section .infox, type=1, flags=0:
309  726566657220746f20736f6d65206c696e6b
310  Section .softX, type=8, flags=7:
311  Section .softy, type=7, flags=4:
312  Section .softz, type=1, flags=2:
313)ffDXD",
314        "test.s:20:26: Warning: Size of argument out of range\n"
315        "test.s:20:39: Warning: Target size of argument out of range\n"
316        "test.s:37:20: Warning: Value 0xfffffaaaaa truncated to 0xfffaaaaa\n"
317        "test.s:38:26: Warning: Value 0x111223030 truncated to 0x11223030\n"
318        "test.s:52:13: Warning: Section type, flags and alignment was ignored\n", true
319    },
320    /* 1 - gallium (configured proginfo) */
321    { R"ffDXD(            .gallium
322            .kernel aa22
323            .args
324            .arg scalar, 8,,,SEXT,griddim
325            .config
326            .priority 1
327            .floatmode 43
328            .ieeemode
329            .sgprsnum 36
330            .vgprsnum 139
331            .pgmrsrc2 523243
332            .scratchbuffer 230
333            .kernel aa23
334            .args
335            .arg scalar, 8,,,SEXT,griddim
336            .config
337            .dims yz
338            .priority 3
339            .ieeemode
340            .pgmrsrc2 0
341.text
342aa22:
343aa23:)ffDXD",
344       R"ffDXD(GalliumBinDump:
345  Kernel: name=aa22, offset=0
346    Config:
347      dims=default, SGPRS=36, VGPRS=139, pgmRSRC2=0x7fbeb, ieeeMode=0x1
348      floatMode=0x2b, priority=1, localSize=0, scratchBuffer=230
349    Arg: scalar, true, griddim, size=8, tgtSize=8, tgtAlign=8
350  Kernel: name=aa23, offset=0
351    Config:
352      dims=6, SGPRS=8, VGPRS=3, pgmRSRC2=0x0, ieeeMode=0x1
353      floatMode=0xc0, priority=3, localSize=0, scratchBuffer=0
354    Arg: scalar, true, griddim, size=8, tgtSize=8, tgtAlign=8
355  Comment:
356  nullptr
357  GlobalData:
358  nullptr
359  Code:
360)ffDXD", "", true
361    },
362    /* 2 - gallium (errors) */
363    {
364        R"ffDXD(            .gallium
365            .kernel firstKernel
366            .entry 111,1
367            .arg scalar,8
368            .arg local,4
369            .arg constant,4
370            .args
371            .arg sclar,5
372            .arg scalar
373            .arg image3d_rd,0,0,1
374            .arg sampler,4,4,4     , zxx,    ssds
375            .arg scalar,8, 18,2,zext,general ,
376            .proginfo
377            .entry ,
378            .entry  ,66
379            .entry 66,
380            .kernel secondKernel
381            .proginfo
382            .entry 1,2
383            .entry 2,3
384            .entry 3,4
385            .entry 5,6
386            .args
387            .proginfo
388            .entry 7,8
389            .section .txt3, "a  ", @xxx
390            .section .txt3, "ax", x
391            .section .txt3, "a vcxs", @  )ffDXD",
392        /* dump */
393        "",
394        /* errors */
395        R"ffDXD(test.s:3:13: Error: ProgInfo entry definition outside ProgInfo
396test.s:4:13: Error: Argument definition outside arguments list
397test.s:5:13: Error: Argument definition outside arguments list
398test.s:6:13: Error: Argument definition outside arguments list
399test.s:8:18: Error: Unknown argument type
400test.s:9:24: Error: Expected ',' before argument
401test.s:10:29: Warning: Size of argument out of range
402test.s:10:31: Warning: Target size of argument out of range
403test.s:11:38: Error: Unknown numeric extension
404test.s:11:46: Error: Unknown argument semantic
405test.s:12:46: Error: Garbages at end of line
406test.s:14:20: Error: Expected expression
407test.s:14:21: Error: Expected expression
408test.s:15:21: Error: Expected expression
409test.s:16:23: Error: Expected expression
410test.s:22:13: Error: Maximum 3 entries can be in ProgInfo
411test.s:25:13: Error: Maximum 3 entries can be in ProgInfo
412test.s:26:29: Error: Only 'a', 'w', 'x' is accepted in flags string
413test.s:26:36: Error: Unknown section type
414test.s:27:35: Error: Section type was not preceded by '@'
415test.s:28:29: Error: Only 'a', 'w', 'x' is accepted in flags string
416test.s:28:39: Error: Section type was not preceded by '@'
417)ffDXD", false
418    },
419    {
420        R"ffDXD(            .gallium
421            .kernel aa22
422            .config
423            .proginfo
424            .kernel av77
425            .proginfo
426            .config
427            .kernel aa22
428            .args
429            .arg scalar, 8,,,SEXT,griddim
430            .config
431            .priority 7
432            .floatmode 343
433            .ieeemode
434            .sgprsnum 136
435            .vgprsnum 339
436)ffDXD", "",
437R"ffDXD(test.s:4:13: Error: ProgInfo can't be defined if configuration was exists
438test.s:7:13: Error: Configuration can't be defined if progInfo was defined
439test.s:12:23: Warning: Value 0x7 truncated to 0x3
440test.s:13:24: Warning: Value 0x157 truncated to 0x57
441test.s:15:23: Error: Used SGPRs number out of range (0-104)
442test.s:16:23: Error: Used VGPRs number out of range (0-256)
443)ffDXD", false
444    },
445    /* AMD HSA */
446    /* 3 - gallium (configured proginfo and AMDHSA) */
447    { R"ffDXD(            .gallium
448        .llvm_version 40000
449            .kernel aa22
450            .args
451            .arg scalar, 8,,,SEXT
452            .arg griddim,4
453            .arg gridoffset,4
454            .config
455            .priority 1
456            .floatmode 43
457            .ieeemode
458            .sgprsnum 36
459            .vgprsnum 139
460            .pgmrsrc2 523243
461            .scratchbuffer 230
462            .default_hsa_features
463           
464            .call_convention 0x34dac
465            .debug_private_segment_buffer_sgpr 98
466            .debug_wavefront_private_segment_offset_sgpr 96
467            .gds_segment_size 100
468            .kernarg_segment_align 32
469            .workgroup_group_segment_size 22
470            .workgroup_fbarrier_count 3324
471            .hsa_sgprsnum 79
472        .control_directive
473        .int 1,2,4
474       
475            .kernel aa23
476            .args
477            .arg scalar, 8,,,SEXT
478            .arg griddim,4
479            .arg gridoffset,4
480            .config
481            .dims yz
482            .priority 3
483            .ieeemode
484            .pgmrsrc2 0
485            .default_hsa_features
486            .group_segment_align 128
487            .kernarg_segment_align 64
488            .kernarg_segment_size 228
489            .kernel_code_entry_offset 256
490            .kernel_code_prefetch_offset 1002
491            .kernel_code_prefetch_size 13431
492            .max_scratch_backing_memory 4212
493            .reserved_sgprs 12,19
494            .reserved_vgprs 26,48
495.text
496aa22:
497    .skip 256
498aa23:
499    .skip 256
500    .kernel aa22
501    .control_directive
502        .fill 116,1,0
503)ffDXD",
504       R"ffDXD(GalliumBinDump:
505  Kernel: name=aa22, offset=0
506    Config:
507      dims=default, SGPRS=36, VGPRS=139, pgmRSRC2=0x7fbeb, ieeeMode=0x1
508      floatMode=0x2b, priority=1, localSize=0, scratchBuffer=230
509    AMD HSA Config:
510      amdCodeVersion=1.1
511      amdMachine=1:0:0:0
512      kernelCodeEntryOffset=256
513      kernelCodePrefetchOffset=0
514      kernelCodePrefetchSize=0
515      maxScrachBackingMemorySize=0
516      computePgmRsrc1=0x8eb662
517      computePgmRsrc2=0x7fbd1
518      enableSgprRegisterFlags=0xb
519      enableFeatureFlags=0xa
520      workitemPrivateSegmentSize=230
521      workgroupGroupSegmentSize=22
522      gdsSegmentSize=100
523      kernargSegmentSize=24
524      workgroupFbarrierCount=3324
525      wavefrontSgprCount=79
526      workitemVgprCount=139
527      reservedVgprFirst=0
528      reservedVgprCount=0
529      reservedSgprFirst=0
530      reservedSgprCount=0
531      debugWavefrontPrivateSegmentOffsetSgpr=96
532      debugPrivateSegmentBufferSgpr=98
533      kernargSegmentAlignment=5
534      groupSegmentAlignment=4
535      privateSegmentAlignment=4
536      wavefrontSize=6
537      callConvention=0x34dac
538      runtimeLoaderKernelSymbol=0x0
539      ControlDirective:
540      0100000002000000040000000000000000000000000000000000000000000000
541      0000000000000000000000000000000000000000000000000000000000000000
542      0000000000000000000000000000000000000000000000000000000000000000
543      0000000000000000000000000000000000000000000000000000000000000000
544    Arg: scalar, true, general, size=8, tgtSize=8, tgtAlign=8
545    Arg: scalar, false, griddim, size=4, tgtSize=4, tgtAlign=4
546    Arg: scalar, false, gridoffset, size=4, tgtSize=4, tgtAlign=4
547  Kernel: name=aa23, offset=256
548    Config:
549      dims=6, SGPRS=12, VGPRS=3, pgmRSRC2=0x0, ieeeMode=0x1
550      floatMode=0xc0, priority=3, localSize=0, scratchBuffer=0
551    AMD HSA Config:
552      amdCodeVersion=1.1
553      amdMachine=1:0:0:0
554      kernelCodeEntryOffset=256
555      kernelCodePrefetchOffset=1002
556      kernelCodePrefetchSize=13431
557      maxScrachBackingMemorySize=4212
558      computePgmRsrc1=0x8c0c40
559      computePgmRsrc2=0x1310
560      enableSgprRegisterFlags=0xb
561      enableFeatureFlags=0xa
562      workitemPrivateSegmentSize=0
563      workgroupGroupSegmentSize=0
564      gdsSegmentSize=0
565      kernargSegmentSize=228
566      workgroupFbarrierCount=0
567      wavefrontSgprCount=12
568      workitemVgprCount=3
569      reservedVgprFirst=26
570      reservedVgprCount=23
571      reservedSgprFirst=12
572      reservedSgprCount=8
573      debugWavefrontPrivateSegmentOffsetSgpr=0
574      debugPrivateSegmentBufferSgpr=0
575      kernargSegmentAlignment=6
576      groupSegmentAlignment=7
577      privateSegmentAlignment=4
578      wavefrontSize=6
579      callConvention=0x0
580      runtimeLoaderKernelSymbol=0x0
581      ControlDirective:
582      0000000000000000000000000000000000000000000000000000000000000000
583      0000000000000000000000000000000000000000000000000000000000000000
584      0000000000000000000000000000000000000000000000000000000000000000
585      0000000000000000000000000000000000000000000000000000000000000000
586    Arg: scalar, true, general, size=8, tgtSize=8, tgtAlign=8
587    Arg: scalar, false, griddim, size=4, tgtSize=4, tgtAlign=4
588    Arg: scalar, false, gridoffset, size=4, tgtSize=4, tgtAlign=4
589  Comment:
590  nullptr
591  GlobalData:
592  nullptr
593  Code:
594  0100000000000000010000000000000000010000000000000000000000000000
595  0000000000000000000000000000000062b68e00d1fb07000b000a00e6000000
596  16000000640000001800000000000000fc0c00004f008b000000000000000000
597  6000620005040406ac4d03000000000000000000000000000000000000000000
598  0100000002000000040000000000000000000000000000000000000000000000
599  0000000000000000000000000000000000000000000000000000000000000000
600  0000000000000000000000000000000000000000000000000000000000000000
601  0000000000000000000000000000000000000000000000000000000000000000
602  010000000000000001000000000000000001000000000000ea03000000000000
603  77340000000000007410000000000000400c8c00101300000b000a0000000000
604  0000000000000000e400000000000000000000000c0003001a0017000c000800
605  0000000006070406000000000000000000000000000000000000000000000000
606  0000000000000000000000000000000000000000000000000000000000000000
607  0000000000000000000000000000000000000000000000000000000000000000
608  0000000000000000000000000000000000000000000000000000000000000000
609  0000000000000000000000000000000000000000000000000000000000000000
610)ffDXD", "", true
611    },
612    /* 3 - gallium - alloc reg flags (extra SGPR registers) */
613    { R"ffDXD(            .gallium
614        .gpu Fiji
615        .llvm_version 40000
616            .kernel aa22
617            .args
618            .arg scalar, 8,,,SEXT
619            .arg griddim,4
620            .arg gridoffset,4
621            .config
622            .priority 1
623            .floatmode 43
624            .ieeemode
625            .vgprsnum 139
626            .pgmrsrc2 523243
627            .scratchbuffer 230
628            .use_flat_scratch_init
629           
630            .call_convention 0x34dac
631            .debug_private_segment_buffer_sgpr 98
632            .debug_wavefront_private_segment_offset_sgpr 96
633            .gds_segment_size 100
634            .kernarg_segment_align 32
635            .workgroup_group_segment_size 22
636            .workgroup_fbarrier_count 3324
637    .text
638aa22:
639    .skip 256
640    s_mov_b32 s54, 455
641)ffDXD", R"ffDXD(GalliumBinDump:
642  Kernel: name=aa22, offset=0
643    Config:
644      dims=default, SGPRS=61, VGPRS=139, pgmRSRC2=0x7fbeb, ieeeMode=0x1
645      floatMode=0x2b, priority=1, localSize=0, scratchBuffer=230
646    AMD HSA Config:
647      amdCodeVersion=1.1
648      amdMachine=1:8:0:3
649      kernelCodeEntryOffset=256
650      kernelCodePrefetchOffset=0
651      kernelCodePrefetchSize=0
652      maxScrachBackingMemorySize=0
653      computePgmRsrc1=0x8eb5e2
654      computePgmRsrc2=0x7fbc5
655      enableSgprRegisterFlags=0x20
656      enableFeatureFlags=0x0
657      workitemPrivateSegmentSize=230
658      workgroupGroupSegmentSize=22
659      gdsSegmentSize=100
660      kernargSegmentSize=24
661      workgroupFbarrierCount=3324
662      wavefrontSgprCount=61
663      workitemVgprCount=139
664      reservedVgprFirst=0
665      reservedVgprCount=0
666      reservedSgprFirst=0
667      reservedSgprCount=0
668      debugWavefrontPrivateSegmentOffsetSgpr=96
669      debugPrivateSegmentBufferSgpr=98
670      kernargSegmentAlignment=5
671      groupSegmentAlignment=4
672      privateSegmentAlignment=4
673      wavefrontSize=6
674      callConvention=0x34dac
675      runtimeLoaderKernelSymbol=0x0
676      ControlDirective:
677      0000000000000000000000000000000000000000000000000000000000000000
678      0000000000000000000000000000000000000000000000000000000000000000
679      0000000000000000000000000000000000000000000000000000000000000000
680      0000000000000000000000000000000000000000000000000000000000000000
681    Arg: scalar, true, general, size=8, tgtSize=8, tgtAlign=8
682    Arg: scalar, false, griddim, size=4, tgtSize=4, tgtAlign=4
683    Arg: scalar, false, gridoffset, size=4, tgtSize=4, tgtAlign=4
684  Comment:
685  nullptr
686  GlobalData:
687  nullptr
688  Code:
689  0100000000000000010008000000030000010000000000000000000000000000
690  00000000000000000000000000000000e2b58e00c5fb070020000000e6000000
691  16000000640000001800000000000000fc0c00003d008b000000000000000000
692  6000620005040406ac4d03000000000000000000000000000000000000000000
693  0000000000000000000000000000000000000000000000000000000000000000
694  0000000000000000000000000000000000000000000000000000000000000000
695  0000000000000000000000000000000000000000000000000000000000000000
696  0000000000000000000000000000000000000000000000000000000000000000
697  ff00b6bec7010000
698)ffDXD", "", true
699    },
700    /* 3 - gallium (configured proginfo and AMDHSA) */
701    { R"ffDXD(            .gallium
702        .llvm_version 40000
703            .kernel aa22
704            .args
705            .arg scalar, 8,,,SEXT
706            .arg griddim,4
707            .arg gridoffset,4
708            .config
709            .priority 1
710            .floatmode 0x12
711            .ieeemode
712            .sgprsnum 36
713            .vgprsnum 139
714            .pgmrsrc2 523243
715            .scratchbuffer 230
716            .default_hsa_features
717            .dims x
718            .hsa_dims xy
719            .hsa_priority 2
720            .call_convention 0x34dac
721            .debug_wavefront_private_segment_offset_sgpr 96
722            .gds_segment_size 100
723            .kernarg_segment_align 32
724            .workgroup_group_segment_size 22
725            .localsize 23
726            .workgroup_fbarrier_count 3324
727            .hsa_sgprsnum 79
728            .hsa_vgprsnum 167
729            .hsa_scratchbuffer 786
730            .hsa_floatmode 0xdd
731        .control_directive
732        .int 1,2,4
733.text
734aa22:
735    .skip 256
736    .kernel aa22
737    .control_directive
738        .fill 116,1,0
739)ffDXD", R"ffDXD(GalliumBinDump:
740  Kernel: name=aa22, offset=0
741    Config:
742      dims=1, SGPRS=36, VGPRS=139, pgmRSRC2=0x7fbeb, ieeeMode=0x1
743      floatMode=0x12, priority=1, localSize=23, scratchBuffer=230
744    AMD HSA Config:
745      amdCodeVersion=1.1
746      amdMachine=1:0:0:0
747      kernelCodeEntryOffset=256
748      kernelCodePrefetchOffset=0
749      kernelCodePrefetchSize=0
750      maxScrachBackingMemorySize=0
751      computePgmRsrc1=0x8dfa69
752      computePgmRsrc2=0x7e9d1
753      enableSgprRegisterFlags=0xb
754      enableFeatureFlags=0xa
755      workitemPrivateSegmentSize=786
756      workgroupGroupSegmentSize=22
757      gdsSegmentSize=100
758      kernargSegmentSize=24
759      workgroupFbarrierCount=3324
760      wavefrontSgprCount=79
761      workitemVgprCount=167
762      reservedVgprFirst=0
763      reservedVgprCount=0
764      reservedSgprFirst=0
765      reservedSgprCount=0
766      debugWavefrontPrivateSegmentOffsetSgpr=96
767      debugPrivateSegmentBufferSgpr=0
768      kernargSegmentAlignment=5
769      groupSegmentAlignment=4
770      privateSegmentAlignment=4
771      wavefrontSize=6
772      callConvention=0x34dac
773      runtimeLoaderKernelSymbol=0x0
774      ControlDirective:
775      0100000002000000040000000000000000000000000000000000000000000000
776      0000000000000000000000000000000000000000000000000000000000000000
777      0000000000000000000000000000000000000000000000000000000000000000
778      0000000000000000000000000000000000000000000000000000000000000000
779    Arg: scalar, true, general, size=8, tgtSize=8, tgtAlign=8
780    Arg: scalar, false, griddim, size=4, tgtSize=4, tgtAlign=4
781    Arg: scalar, false, gridoffset, size=4, tgtSize=4, tgtAlign=4
782  Comment:
783  nullptr
784  GlobalData:
785  nullptr
786  Code:
787  0100000000000000010000000000000000010000000000000000000000000000
788  0000000000000000000000000000000069fa8d00d1e907000b000a0012030000
789  16000000640000001800000000000000fc0c00004f00a7000000000000000000
790  6000000005040406ac4d03000000000000000000000000000000000000000000
791  0100000002000000040000000000000000000000000000000000000000000000
792  0000000000000000000000000000000000000000000000000000000000000000
793  0000000000000000000000000000000000000000000000000000000000000000
794  0000000000000000000000000000000000000000000000000000000000000000
795)ffDXD", "", true
796    },
797    /* scratch relocations */
798    /* 1 - gallium scratch relocation */
799    { R"ffDXD(            .gallium
800            .kernel aa22
801            .args
802            .arg scalar, 8,,,SEXT,griddim
803            .config
804            .priority 1
805            .floatmode 43
806            .ieeemode
807            .sgprsnum 36
808            .vgprsnum 139
809            .pgmrsrc2 523243
810            .scratchbuffer 230
811        .scratchsym scratch
812.text
813aa22:
814        s_and_b32 s9,s5,44
815        s_and_b32 s10,s5,5
816        s_mov_b32 s1, scratch
817        s_mov_b32 s1, scratch+7*3-21
818        s_mov_b32 s1, (scratch+7*3-21)&(1<<32-1)
819        s_mov_b32 s1, ((scratch)*2-scratch)&(4096*4096*256-1)
820        s_mov_b32 s1, (-scratch+2*(scratch))%(4096*4096*256)
821        s_mov_b32 s1, (-scratch+2*(scratch))%%(4096*4096*256)
822        s_mov_b32 s1, (-scratch+2*(scratch))%%(4096*4096*256*9)
823        s_mov_b32 s1, (-scratch+2*(scratch))%(4096*4096*256*9)
824        s_mov_b32 s1, (scratch+6-6)>>(31-5+6)
825        s_mov_b32 s1, (scratch+6-3*2)>>(234%101)
826        s_mov_b32 s1, (scratch)>>(235%101-1)
827        s_mov_b32 s1, (scratch)/(4096*4096*256)
828        s_mov_b32 s1, (scratch+6-3*2)//(4096*4096*256)
829        s_mov_b32 s1, scratch>>32
830        s_mov_b32 s1, scratch&0xffffffff
831)ffDXD",
832       R"ffDXD(GalliumBinDump:
833  Kernel: name=aa22, offset=0
834    Config:
835      dims=default, SGPRS=36, VGPRS=139, pgmRSRC2=0x7fbeb, ieeeMode=0x1
836      floatMode=0x2b, priority=1, localSize=0, scratchBuffer=230
837    Arg: scalar, true, griddim, size=8, tgtSize=8, tgtAlign=8
838  Scratch relocations:
839    Rel: offset=12, type: 1
840    Rel: offset=20, type: 1
841    Rel: offset=28, type: 1
842    Rel: offset=36, type: 1
843    Rel: offset=44, type: 1
844    Rel: offset=52, type: 1
845    Rel: offset=60, type: 1
846    Rel: offset=68, type: 1
847    Rel: offset=76, type: 2
848    Rel: offset=84, type: 2
849    Rel: offset=92, type: 2
850    Rel: offset=100, type: 2
851    Rel: offset=108, type: 2
852    Rel: offset=116, type: 2
853    Rel: offset=124, type: 1
854  Comment:
855  nullptr
856  GlobalData:
857  nullptr
858  Code:
859  05ac098705850a87ff0381be04000000ff0381be04000000ff0381be04000000
860  ff0381be04000000ff0381be04000000ff0381be04000000ff0381be04000000
861  ff0381be04000000ff0381be04000000ff0381be04000000ff0381be04000000
862  ff0381be04000000ff0381be04000000ff0381be04000000ff0381be04000000
863)ffDXD", "", true
864    },
865    { R"ffDXD(            .gallium
866            .kernel aa22
867            .args
868            .arg scalar, 8,,,SEXT,griddim
869            .config
870            .priority 1
871            .scratchbuffer 230
872        .scratchsym scratch
873.text
874aa22:
875        s_and_b32 s9,s5,44
876        s_and_b32 s10,s5,5
877        s_mov_b32 s1, (scratch+6)&0xffffffff
878)ffDXD", "", "test.s:13:23: Error: Expression must point to start of section\n", false
879    }
880};
881
882static void testAssembler(cxuint testId, const AsmTestCase& testCase)
883{
884    std::istringstream input(testCase.input);
885    std::ostringstream errorStream;
886    std::ostringstream printStream;
887   
888    // create assembler with testcase's input and with ASM_TESTRUN flag
889    Assembler assembler("test.s", input, (ASM_ALL|ASM_TESTRUN)&~ASM_ALTMACRO,
890            BinaryFormat::AMD, GPUDeviceType::CAPE_VERDE, errorStream, printStream);
891    assembler.setLLVMVersion(1);
892    bool good = assembler.assemble();
893   
894    std::ostringstream dumpOss;
895    if (good && assembler.getFormatHandler()!=nullptr)
896        // get format handler and their output
897        printGalliumOutput(dumpOss, static_cast<const AsmGalliumHandler*>(
898                    assembler.getFormatHandler())->getOutput(),
899                    assembler.getLLVMVersion() >= 40000U);
900    /* compare result dump with expected dump */
901    char testName[30];
902    snprintf(testName, 30, "Test #%u", testId);
903   
904    assertValue(testName, "good", int(testCase.good), int(good));
905    assertString(testName, "dump", testCase.dump, dumpOss.str());
906    assertString(testName, "errorMessages", testCase.errors, errorStream.str());
907}
908
909int main(int argc, const char** argv)
910{
911    int retVal = 0;
912    for (size_t i = 0; i < sizeof(asmTestCases1Tbl)/sizeof(AsmTestCase); i++)
913        try
914        { testAssembler(i, asmTestCases1Tbl[i]); }
915        catch(const std::exception& ex)
916        {
917            std::cerr << ex.what() << std::endl;
918            retVal = 1;
919        }
920    return retVal;
921}
Note: See TracBrowser for help on using the repository browser.