source: CLRX/CLRadeonExtender/trunk/tests/amdasm/AsmGalliumFormat.cpp @ 3746

Last change on this file since 3746 was 3746, checked in by matszpk, 2 years ago

CLRadeonExtender: Add includes to testcases.

File size: 33.8 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2018 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19
20#include <CLRX/Config.h>
21#include <iostream>
22#include <cstdio>
23#include <sstream>
24#include <string>
25#include <algorithm>
26#include <memory>
27#include <CLRX/amdbin/GalliumBinaries.h>
28#include <CLRX/amdasm/Assembler.h>
29#include <CLRX/amdasm/AsmFormats.h>
30#include "../TestUtils.h"
31
32using namespace CLRX;
33
34static const char* galliumArgTypeTbl[] =
35{
36    "scalar", "constant", "global", "local", "image2dro", "image2dwr",
37    "image3dro", "image3dwr", "sampler"
38};
39
40static const char* galliumArgSemanticTbl[] =
41{   "general", "griddim", "gridoffset" };
42
43// helper for printing value from kernel config (print default and notsupplied)
44static std::string confValueToString(uint32_t val)
45{
46    if (val == BINGEN_DEFAULT)
47        return "default";
48    if (val == BINGEN_NOTSUPPLIED)
49        return "notsup";
50    std::ostringstream oss;
51    oss << val;
52    return oss.str();
53}
54
55// print hex data or nullptr
56static void printHexData(std::ostream& os, cxuint indentLevel, size_t size,
57             const cxbyte* data)
58{
59    if (data==nullptr)
60    {
61        for (cxuint j = 0; j < indentLevel; j++)
62            os << "  ";
63        os << "nullptr\n";
64        return;
65    }
66    for (size_t i = 0; i < size; i++)
67    {
68        if ((i&31)==0)
69            for (cxuint j = 0; j < indentLevel; j++)
70                os << "  ";
71        char buf[10];
72        snprintf(buf, 10, "%02x", cxuint(data[i]));
73        os << buf;
74        if ((i&31)==31 || i+1 == size)
75            os << '\n';
76    }
77}
78
79// print dump of gallium output to stream for comparing with testcase
80static void printGalliumOutput(std::ostream& os, const GalliumInput* output, bool amdHsa)
81{
82    os << "GalliumBinDump:" << std::endl;
83    for (const GalliumKernelInput& kernel: output->kernels)
84    {
85        os << "  Kernel: name=" << kernel.kernelName << ", " <<
86                "offset=" << kernel.offset << "\n";
87        if (!kernel.useConfig)
88        {
89            // if not configuration, then print same prog info entries (only old gallium)
90            os << "    ProgInfo: ";
91            for (cxuint i = 0; i < 3; i++)
92                os << "0x" << std::hex << kernel.progInfo[i].address <<
93                        "=0x" << kernel.progInfo[i].value << ((i==2)?"\n":", ");
94            os << std::dec;
95        }
96        else
97        {
98            // print kernel config
99            const GalliumKernelConfig& config = kernel.config;
100            os << "    Config:\n";
101            os << "      dims=" << confValueToString(config.dimMask) << ", "
102                    "SGPRS=" << confValueToString(config.usedSGPRsNum) << ", "
103                    "VGPRS=" << confValueToString(config.usedVGPRsNum) << ", "
104                    "pgmRSRC2=" << std::hex << "0x" << config.pgmRSRC2 << ", "
105                    "ieeeMode=0x" << cxuint(config.ieeeMode) << "\n      "
106                    "floatMode=0x" << cxuint(config.floatMode) << std::dec << ", "
107                    "priority=" << cxuint(config.priority) << ", "
108                    "localSize=" << config.localSize << ", "
109                    "scratchBuffer=" << config.scratchBufferSize << std::endl;
110            if (amdHsa)
111            {
112                // print also AMD HSA configuration
113                const AmdHsaKernelConfig& config =
114                    *reinterpret_cast<const AmdHsaKernelConfig*>(
115                                output->code + kernel.offset);
116                os << "    AMD HSA Config:\n"
117                    "      amdCodeVersion=" << ULEV(config.amdCodeVersionMajor) << "." <<
118                        ULEV(config.amdCodeVersionMajor) << "\n"
119                    "      amdMachine=" << ULEV(config.amdMachineKind) << ":" <<
120                        ULEV(config.amdMachineMajor) << ":" <<
121                        ULEV(config.amdMachineMinor) << ":" <<
122                        ULEV(config.amdMachineStepping) << "\n"
123                    "      kernelCodeEntryOffset=" <<
124                        ULEV(config.kernelCodeEntryOffset) << "\n"
125                    "      kernelCodePrefetchOffset=" <<
126                        ULEV(config.kernelCodePrefetchOffset) << "\n"
127                    "      kernelCodePrefetchSize=" <<
128                            ULEV(config.kernelCodePrefetchSize) << "\n"
129                    "      maxScrachBackingMemorySize=" <<
130                        ULEV(config.maxScrachBackingMemorySize) << "\n"
131                    "      computePgmRsrc1=0x" << std::hex <<
132                            ULEV(config.computePgmRsrc1) << "\n"
133                    "      computePgmRsrc2=0x" << ULEV(config.computePgmRsrc2) << "\n"
134                    "      enableSgprRegisterFlags=0x" <<
135                        ULEV(config.enableSgprRegisterFlags) << "\n"
136                    "      enableFeatureFlags=0x" <<
137                        ULEV(config.enableFeatureFlags) << std::dec << "\n"
138                    "      workitemPrivateSegmentSize=" <<
139                        ULEV(config.workitemPrivateSegmentSize) << "\n"
140                    "      workgroupGroupSegmentSize=" <<
141                        ULEV(config.workgroupGroupSegmentSize) << "\n"
142                    "      gdsSegmentSize=" << ULEV(config.gdsSegmentSize) << "\n"
143                    "      kernargSegmentSize=" << ULEV(config.kernargSegmentSize) << "\n"
144                    "      workgroupFbarrierCount=" <<
145                            ULEV(config.workgroupFbarrierCount) << "\n"
146                    "      wavefrontSgprCount=" << ULEV(config.wavefrontSgprCount) << "\n"
147                    "      workitemVgprCount=" << ULEV(config.workitemVgprCount) << "\n"
148                    "      reservedVgprFirst=" << ULEV(config.reservedVgprFirst) << "\n"
149                    "      reservedVgprCount=" << ULEV(config.reservedVgprCount) << "\n"
150                    "      reservedSgprFirst=" << ULEV(config.reservedSgprFirst) << "\n"
151                    "      reservedSgprCount=" << ULEV(config.reservedSgprCount) << "\n"
152                    "      debugWavefrontPrivateSegmentOffsetSgpr=" <<
153                        ULEV(config.debugWavefrontPrivateSegmentOffsetSgpr) << "\n"
154                    "      debugPrivateSegmentBufferSgpr=" <<
155                        ULEV(config.debugPrivateSegmentBufferSgpr) << "\n"
156                    "      kernargSegmentAlignment=" << 
157                        cxuint(config.kernargSegmentAlignment) << "\n"
158                    "      groupSegmentAlignment=" <<
159                        cxuint(config.groupSegmentAlignment) << "\n"
160                    "      privateSegmentAlignment=" <<
161                        cxuint(config.privateSegmentAlignment) << "\n"
162                    "      wavefrontSize=" << cxuint(config.wavefrontSize) << "\n"
163                    "      callConvention=0x" << std::hex <<
164                        ULEV(config.callConvention) << "\n"
165                    "      runtimeLoaderKernelSymbol=0x" <<
166                        ULEV(config.runtimeLoaderKernelSymbol) << std::dec << "\n";
167                os << "      ControlDirective:\n";
168                printHexData(os, 3, 128, config.controlDirective);
169            }
170        }
171        for (const GalliumArgInfo& arg: kernel.argInfos)
172        {
173            os << "    Arg: " << galliumArgTypeTbl[cxuint(arg.type)] << ", " <<
174                    ((arg.signExtended) ? "true" : "false") << ", " <<
175                    galliumArgSemanticTbl[cxuint(arg.semantic)] << ", " <<
176                    "size=" << arg.size << ", tgtSize=" << arg.targetSize << ", " <<
177                    "tgtAlign=" << arg.targetAlign << "\n";
178        }
179        os.flush();
180    }
181    // scratch relocations
182    if (!output->scratchRelocs.empty())
183    {
184        os << "  Scratch relocations:\n";
185        for (const GalliumScratchReloc& rel: output->scratchRelocs)
186            os << "    Rel: offset=" << rel.offset << ", type: " << rel.type << "\n";
187    }
188    // other data from output
189    os << "  Comment:\n";
190    printHexData(os, 1, output->commentSize, (const cxbyte*)output->comment);
191    os << "  GlobalData:\n";
192    printHexData(os, 1, output->globalDataSize, output->globalData);
193    os << "  Code:\n";
194    printHexData(os, 1, output->codeSize, output->code);
195   
196    // print extra sections when supplied
197    for (BinSection section: output->extraSections)
198    {
199        os << "  Section " << section.name << ", type=" << section.type <<
200                        ", flags=" << section.flags << ":\n";
201        printHexData(os, 1, section.size, section.data);
202    }
203    // print extra symbols when supplied
204    for (BinSymbol symbol: output->extraSymbols)
205        os << "  Symbol: name=" << symbol.name << ", value=" << symbol.value <<
206                ", size=" << symbol.size << ", section=" << symbol.sectionId << "\n";
207    os.flush();
208}
209
210struct AsmTestCase
211{
212    const char* input;
213    const char* dump;
214    const char* errors;
215    bool good;
216};
217
218static const AsmTestCase asmTestCases1Tbl[] =
219{
220    /* 0 - gallium  */
221    {
222        R"ffDXD(            .gallium
223            .kernel firstKernel
224            .args
225            .arg scalar,8
226            .arg local,4
227            .arg constant,4
228            .arg global,4
229            .arg image3d_rd,4
230            .arg image2d_rd,4
231            .arg image3d_wr,4
232            .arg image2d_wr,4
233            .arg sampler,8
234            .arg scalar,8, , ,sext
235            .arg scalar,8, , ,sext,general
236            .arg scalar,8, 18,2
237            .arg scalar,20, 18,2
238            .arg scalar,8, , , ,griddim
239            .arg griddim,4
240            .arg gridoffset,4
241            .arg scalar, 11111111111, 22222222222222, 4
242            .section .comment
243            .ascii "nocomments"
244            .globaldata
245            .byte 0xf0,0xfd,0x3d,0x44
246            .kernel secondKernel
247            .proginfo
248            .entry 12,22
249            .entry 14,288
250            .entry 16,160
251            .args
252            .arg scalar,4
253            .arg scalar,4
254            .arg griddim,4
255            .arg gridoffset,4
256            .kernel thirdKernel
257            .proginfo
258            .entry 0xfffffaaaaa, 0x12233
259            .entry 0xff, 0x111223030
260            .entry 1,2
261            .text
262firstKernel: .byte 1,22,3,4
263            .p2align 4
264secondKernel:.byte 77,76,75,90,11
265thirdKernel:
266            .section .info1
267            .ascii "noinfo"
268            .section .infox
269            .ascii "refer to some link"
270            .section .softX ,"awx",@nobits
271            .section .softy ,"x",@note
272            .section .softz ,"a",@progbits
273            .section .softX ,"a",@nobits)ffDXD",
274        /* dump */
275        R"ffDXD(GalliumBinDump:
276  Kernel: name=firstKernel, offset=0
277    ProgInfo: 0xb848=0xc0000, 0xb84c=0x1788, 0xb860=0x0
278    Arg: scalar, false, general, size=8, tgtSize=8, tgtAlign=8
279    Arg: local, false, general, size=4, tgtSize=4, tgtAlign=4
280    Arg: constant, false, general, size=4, tgtSize=4, tgtAlign=4
281    Arg: global, false, general, size=4, tgtSize=4, tgtAlign=4
282    Arg: image3dro, false, general, size=4, tgtSize=4, tgtAlign=4
283    Arg: image2dro, false, general, size=4, tgtSize=4, tgtAlign=4
284    Arg: image3dwr, false, general, size=4, tgtSize=4, tgtAlign=4
285    Arg: image2dwr, false, general, size=4, tgtSize=4, tgtAlign=4
286    Arg: sampler, false, general, size=8, tgtSize=8, tgtAlign=8
287    Arg: scalar, true, general, size=8, tgtSize=8, tgtAlign=8
288    Arg: scalar, true, general, size=8, tgtSize=8, tgtAlign=8
289    Arg: scalar, false, general, size=8, tgtSize=18, tgtAlign=2
290    Arg: scalar, false, general, size=20, tgtSize=18, tgtAlign=2
291    Arg: scalar, false, griddim, size=8, tgtSize=8, tgtAlign=8
292    Arg: scalar, false, griddim, size=4, tgtSize=4, tgtAlign=4
293    Arg: scalar, false, gridoffset, size=4, tgtSize=4, tgtAlign=4
294    Arg: scalar, false, general, size=2521176519, tgtSize=61432718, tgtAlign=4
295  Kernel: name=secondKernel, offset=16
296    ProgInfo: 0xc=0x16, 0xe=0x120, 0x10=0xa0
297    Arg: scalar, false, general, size=4, tgtSize=4, tgtAlign=4
298    Arg: scalar, false, general, size=4, tgtSize=4, tgtAlign=4
299    Arg: scalar, false, griddim, size=4, tgtSize=4, tgtAlign=4
300    Arg: scalar, false, gridoffset, size=4, tgtSize=4, tgtAlign=4
301  Kernel: name=thirdKernel, offset=21
302    ProgInfo: 0xfffaaaaa=0x12233, 0xff=0x11223030, 0x1=0x2
303  Comment:
304  6e6f636f6d6d656e7473
305  GlobalData:
306  f0fd3d44
307  Code:
308  01160304000080bf000080bf000080bf4d4c4b5a0b
309  Section .info1, type=1, flags=0:
310  6e6f696e666f
311  Section .infox, type=1, flags=0:
312  726566657220746f20736f6d65206c696e6b
313  Section .softX, type=8, flags=7:
314  Section .softy, type=7, flags=4:
315  Section .softz, type=1, flags=2:
316)ffDXD",
317        "test.s:20:26: Warning: Size of argument out of range\n"
318        "test.s:20:39: Warning: Target size of argument out of range\n"
319        "test.s:37:20: Warning: Value 0xfffffaaaaa truncated to 0xfffaaaaa\n"
320        "test.s:38:26: Warning: Value 0x111223030 truncated to 0x11223030\n"
321        "test.s:52:13: Warning: Section type, flags and alignment was ignored\n", true
322    },
323    /* 1 - gallium (configured proginfo) */
324    { R"ffDXD(            .gallium
325            .kernel aa22
326            .args
327            .arg scalar, 8,,,SEXT,griddim
328            .config
329            .priority 1
330            .floatmode 43
331            .ieeemode
332            .sgprsnum 36
333            .vgprsnum 139
334            .pgmrsrc2 523243
335            .scratchbuffer 230
336            .kernel aa23
337            .args
338            .arg scalar, 8,,,SEXT,griddim
339            .config
340            .dims yz
341            .priority 3
342            .ieeemode
343            .pgmrsrc2 0
344.text
345aa22:
346aa23:)ffDXD",
347       R"ffDXD(GalliumBinDump:
348  Kernel: name=aa22, offset=0
349    Config:
350      dims=default, SGPRS=36, VGPRS=139, pgmRSRC2=0x7fbeb, ieeeMode=0x1
351      floatMode=0x2b, priority=1, localSize=0, scratchBuffer=230
352    Arg: scalar, true, griddim, size=8, tgtSize=8, tgtAlign=8
353  Kernel: name=aa23, offset=0
354    Config:
355      dims=6, SGPRS=8, VGPRS=3, pgmRSRC2=0x0, ieeeMode=0x1
356      floatMode=0xc0, priority=3, localSize=0, scratchBuffer=0
357    Arg: scalar, true, griddim, size=8, tgtSize=8, tgtAlign=8
358  Comment:
359  nullptr
360  GlobalData:
361  nullptr
362  Code:
363)ffDXD", "", true
364    },
365    /* 2 - gallium (errors) */
366    {
367        R"ffDXD(            .gallium
368            .kernel firstKernel
369            .entry 111,1
370            .arg scalar,8
371            .arg local,4
372            .arg constant,4
373            .args
374            .arg sclar,5
375            .arg scalar
376            .arg image3d_rd,0,0,1
377            .arg sampler,4,4,4     , zxx,    ssds
378            .arg scalar,8, 18,2,zext,general ,
379            .proginfo
380            .entry ,
381            .entry  ,66
382            .entry 66,
383            .kernel secondKernel
384            .proginfo
385            .entry 1,2
386            .entry 2,3
387            .entry 3,4
388            .entry 5,6
389            .args
390            .proginfo
391            .entry 7,8
392            .section .txt3, "a  ", @xxx
393            .section .txt3, "ax", x
394            .section .txt3, "a vcxs", @  )ffDXD",
395        /* dump */
396        "",
397        /* errors */
398        R"ffDXD(test.s:3:13: Error: ProgInfo entry definition outside ProgInfo
399test.s:4:13: Error: Argument definition outside arguments list
400test.s:5:13: Error: Argument definition outside arguments list
401test.s:6:13: Error: Argument definition outside arguments list
402test.s:8:18: Error: Unknown argument type
403test.s:9:24: Error: Expected ',' before argument
404test.s:10:29: Warning: Size of argument out of range
405test.s:10:31: Warning: Target size of argument out of range
406test.s:11:38: Error: Unknown numeric extension
407test.s:11:46: Error: Unknown argument semantic
408test.s:12:46: Error: Garbages at end of line
409test.s:14:20: Error: Expected expression
410test.s:14:21: Error: Expected expression
411test.s:15:21: Error: Expected expression
412test.s:16:23: Error: Expected expression
413test.s:22:13: Error: Maximum 3 entries can be in ProgInfo
414test.s:25:13: Error: Maximum 3 entries can be in ProgInfo
415test.s:26:29: Error: Only 'a', 'w', 'x' is accepted in flags string
416test.s:26:36: Error: Unknown section type
417test.s:27:35: Error: Section type was not preceded by '@'
418test.s:28:29: Error: Only 'a', 'w', 'x' is accepted in flags string
419test.s:28:39: Error: Section type was not preceded by '@'
420)ffDXD", false
421    },
422    {
423        R"ffDXD(            .gallium
424            .kernel aa22
425            .config
426            .proginfo
427            .kernel av77
428            .proginfo
429            .config
430            .kernel aa22
431            .args
432            .arg scalar, 8,,,SEXT,griddim
433            .config
434            .priority 7
435            .floatmode 343
436            .ieeemode
437            .sgprsnum 136
438            .vgprsnum 339
439)ffDXD", "",
440R"ffDXD(test.s:4:13: Error: ProgInfo can't be defined if configuration was exists
441test.s:7:13: Error: Configuration can't be defined if progInfo was defined
442test.s:12:23: Warning: Value 0x7 truncated to 0x3
443test.s:13:24: Warning: Value 0x157 truncated to 0x57
444test.s:15:23: Error: Used SGPRs number out of range (0-104)
445test.s:16:23: Error: Used VGPRs number out of range (0-256)
446)ffDXD", false
447    },
448    /* AMD HSA */
449    /* 3 - gallium (configured proginfo and AMDHSA) */
450    { R"ffDXD(            .gallium
451        .llvm_version 40000
452            .kernel aa22
453            .args
454            .arg scalar, 8,,,SEXT
455            .arg griddim,4
456            .arg gridoffset,4
457            .config
458            .priority 1
459            .floatmode 43
460            .ieeemode
461            .sgprsnum 36
462            .vgprsnum 139
463            .pgmrsrc2 523243
464            .scratchbuffer 230
465            .default_hsa_features
466           
467            .call_convention 0x34dac
468            .debug_private_segment_buffer_sgpr 98
469            .debug_wavefront_private_segment_offset_sgpr 96
470            .gds_segment_size 100
471            .kernarg_segment_align 32
472            .workgroup_group_segment_size 22
473            .workgroup_fbarrier_count 3324
474            .hsa_sgprsnum 79
475        .control_directive
476        .int 1,2,4
477       
478            .kernel aa23
479            .args
480            .arg scalar, 8,,,SEXT
481            .arg griddim,4
482            .arg gridoffset,4
483            .config
484            .dims yz
485            .priority 3
486            .ieeemode
487            .pgmrsrc2 0
488            .default_hsa_features
489            .group_segment_align 128
490            .kernarg_segment_align 64
491            .kernarg_segment_size 228
492            .kernel_code_entry_offset 256
493            .kernel_code_prefetch_offset 1002
494            .kernel_code_prefetch_size 13431
495            .max_scratch_backing_memory 4212
496            .reserved_sgprs 12,19
497            .reserved_vgprs 26,48
498.text
499aa22:
500    .skip 256
501aa23:
502    .skip 256
503    .kernel aa22
504    .control_directive
505        .fill 116,1,0
506)ffDXD",
507       R"ffDXD(GalliumBinDump:
508  Kernel: name=aa22, offset=0
509    Config:
510      dims=default, SGPRS=36, VGPRS=139, pgmRSRC2=0x7fbeb, ieeeMode=0x1
511      floatMode=0x2b, priority=1, localSize=0, scratchBuffer=230
512    AMD HSA Config:
513      amdCodeVersion=1.1
514      amdMachine=1:0:0:0
515      kernelCodeEntryOffset=256
516      kernelCodePrefetchOffset=0
517      kernelCodePrefetchSize=0
518      maxScrachBackingMemorySize=0
519      computePgmRsrc1=0x8eb662
520      computePgmRsrc2=0x7fbd1
521      enableSgprRegisterFlags=0xb
522      enableFeatureFlags=0xa
523      workitemPrivateSegmentSize=230
524      workgroupGroupSegmentSize=22
525      gdsSegmentSize=100
526      kernargSegmentSize=24
527      workgroupFbarrierCount=3324
528      wavefrontSgprCount=79
529      workitemVgprCount=139
530      reservedVgprFirst=0
531      reservedVgprCount=0
532      reservedSgprFirst=0
533      reservedSgprCount=0
534      debugWavefrontPrivateSegmentOffsetSgpr=96
535      debugPrivateSegmentBufferSgpr=98
536      kernargSegmentAlignment=5
537      groupSegmentAlignment=4
538      privateSegmentAlignment=4
539      wavefrontSize=6
540      callConvention=0x34dac
541      runtimeLoaderKernelSymbol=0x0
542      ControlDirective:
543      0100000002000000040000000000000000000000000000000000000000000000
544      0000000000000000000000000000000000000000000000000000000000000000
545      0000000000000000000000000000000000000000000000000000000000000000
546      0000000000000000000000000000000000000000000000000000000000000000
547    Arg: scalar, true, general, size=8, tgtSize=8, tgtAlign=8
548    Arg: scalar, false, griddim, size=4, tgtSize=4, tgtAlign=4
549    Arg: scalar, false, gridoffset, size=4, tgtSize=4, tgtAlign=4
550  Kernel: name=aa23, offset=256
551    Config:
552      dims=6, SGPRS=12, VGPRS=3, pgmRSRC2=0x0, ieeeMode=0x1
553      floatMode=0xc0, priority=3, localSize=0, scratchBuffer=0
554    AMD HSA Config:
555      amdCodeVersion=1.1
556      amdMachine=1:0:0:0
557      kernelCodeEntryOffset=256
558      kernelCodePrefetchOffset=1002
559      kernelCodePrefetchSize=13431
560      maxScrachBackingMemorySize=4212
561      computePgmRsrc1=0x8c0c40
562      computePgmRsrc2=0x1310
563      enableSgprRegisterFlags=0xb
564      enableFeatureFlags=0xa
565      workitemPrivateSegmentSize=0
566      workgroupGroupSegmentSize=0
567      gdsSegmentSize=0
568      kernargSegmentSize=228
569      workgroupFbarrierCount=0
570      wavefrontSgprCount=12
571      workitemVgprCount=3
572      reservedVgprFirst=26
573      reservedVgprCount=23
574      reservedSgprFirst=12
575      reservedSgprCount=8
576      debugWavefrontPrivateSegmentOffsetSgpr=0
577      debugPrivateSegmentBufferSgpr=0
578      kernargSegmentAlignment=6
579      groupSegmentAlignment=7
580      privateSegmentAlignment=4
581      wavefrontSize=6
582      callConvention=0x0
583      runtimeLoaderKernelSymbol=0x0
584      ControlDirective:
585      0000000000000000000000000000000000000000000000000000000000000000
586      0000000000000000000000000000000000000000000000000000000000000000
587      0000000000000000000000000000000000000000000000000000000000000000
588      0000000000000000000000000000000000000000000000000000000000000000
589    Arg: scalar, true, general, size=8, tgtSize=8, tgtAlign=8
590    Arg: scalar, false, griddim, size=4, tgtSize=4, tgtAlign=4
591    Arg: scalar, false, gridoffset, size=4, tgtSize=4, tgtAlign=4
592  Comment:
593  nullptr
594  GlobalData:
595  nullptr
596  Code:
597  0100000000000000010000000000000000010000000000000000000000000000
598  0000000000000000000000000000000062b68e00d1fb07000b000a00e6000000
599  16000000640000001800000000000000fc0c00004f008b000000000000000000
600  6000620005040406ac4d03000000000000000000000000000000000000000000
601  0100000002000000040000000000000000000000000000000000000000000000
602  0000000000000000000000000000000000000000000000000000000000000000
603  0000000000000000000000000000000000000000000000000000000000000000
604  0000000000000000000000000000000000000000000000000000000000000000
605  010000000000000001000000000000000001000000000000ea03000000000000
606  77340000000000007410000000000000400c8c00101300000b000a0000000000
607  0000000000000000e400000000000000000000000c0003001a0017000c000800
608  0000000006070406000000000000000000000000000000000000000000000000
609  0000000000000000000000000000000000000000000000000000000000000000
610  0000000000000000000000000000000000000000000000000000000000000000
611  0000000000000000000000000000000000000000000000000000000000000000
612  0000000000000000000000000000000000000000000000000000000000000000
613)ffDXD", "", true
614    },
615    /* 3 - gallium - alloc reg flags (extra SGPR registers) */
616    { R"ffDXD(            .gallium
617        .gpu Fiji
618        .llvm_version 40000
619            .kernel aa22
620            .args
621            .arg scalar, 8,,,SEXT
622            .arg griddim,4
623            .arg gridoffset,4
624            .config
625            .priority 1
626            .floatmode 43
627            .ieeemode
628            .vgprsnum 139
629            .pgmrsrc2 523243
630            .scratchbuffer 230
631            .use_flat_scratch_init
632           
633            .call_convention 0x34dac
634            .debug_private_segment_buffer_sgpr 98
635            .debug_wavefront_private_segment_offset_sgpr 96
636            .gds_segment_size 100
637            .kernarg_segment_align 32
638            .workgroup_group_segment_size 22
639            .workgroup_fbarrier_count 3324
640    .text
641aa22:
642    .skip 256
643    s_mov_b32 s54, 455
644)ffDXD", R"ffDXD(GalliumBinDump:
645  Kernel: name=aa22, offset=0
646    Config:
647      dims=default, SGPRS=61, VGPRS=139, pgmRSRC2=0x7fbeb, ieeeMode=0x1
648      floatMode=0x2b, priority=1, localSize=0, scratchBuffer=230
649    AMD HSA Config:
650      amdCodeVersion=1.1
651      amdMachine=1:8:0:3
652      kernelCodeEntryOffset=256
653      kernelCodePrefetchOffset=0
654      kernelCodePrefetchSize=0
655      maxScrachBackingMemorySize=0
656      computePgmRsrc1=0x8eb5e2
657      computePgmRsrc2=0x7fbc5
658      enableSgprRegisterFlags=0x20
659      enableFeatureFlags=0x0
660      workitemPrivateSegmentSize=230
661      workgroupGroupSegmentSize=22
662      gdsSegmentSize=100
663      kernargSegmentSize=24
664      workgroupFbarrierCount=3324
665      wavefrontSgprCount=61
666      workitemVgprCount=139
667      reservedVgprFirst=0
668      reservedVgprCount=0
669      reservedSgprFirst=0
670      reservedSgprCount=0
671      debugWavefrontPrivateSegmentOffsetSgpr=96
672      debugPrivateSegmentBufferSgpr=98
673      kernargSegmentAlignment=5
674      groupSegmentAlignment=4
675      privateSegmentAlignment=4
676      wavefrontSize=6
677      callConvention=0x34dac
678      runtimeLoaderKernelSymbol=0x0
679      ControlDirective:
680      0000000000000000000000000000000000000000000000000000000000000000
681      0000000000000000000000000000000000000000000000000000000000000000
682      0000000000000000000000000000000000000000000000000000000000000000
683      0000000000000000000000000000000000000000000000000000000000000000
684    Arg: scalar, true, general, size=8, tgtSize=8, tgtAlign=8
685    Arg: scalar, false, griddim, size=4, tgtSize=4, tgtAlign=4
686    Arg: scalar, false, gridoffset, size=4, tgtSize=4, tgtAlign=4
687  Comment:
688  nullptr
689  GlobalData:
690  nullptr
691  Code:
692  0100000000000000010008000000030000010000000000000000000000000000
693  00000000000000000000000000000000e2b58e00c5fb070020000000e6000000
694  16000000640000001800000000000000fc0c00003d008b000000000000000000
695  6000620005040406ac4d03000000000000000000000000000000000000000000
696  0000000000000000000000000000000000000000000000000000000000000000
697  0000000000000000000000000000000000000000000000000000000000000000
698  0000000000000000000000000000000000000000000000000000000000000000
699  0000000000000000000000000000000000000000000000000000000000000000
700  ff00b6bec7010000
701)ffDXD", "", true
702    },
703    /* 3 - gallium (configured proginfo and AMDHSA) */
704    { R"ffDXD(            .gallium
705        .llvm_version 40000
706            .kernel aa22
707            .args
708            .arg scalar, 8,,,SEXT
709            .arg griddim,4
710            .arg gridoffset,4
711            .config
712            .priority 1
713            .floatmode 0x12
714            .ieeemode
715            .sgprsnum 36
716            .vgprsnum 139
717            .pgmrsrc2 523243
718            .scratchbuffer 230
719            .default_hsa_features
720            .dims x
721            .hsa_dims xy
722            .hsa_priority 2
723            .call_convention 0x34dac
724            .debug_wavefront_private_segment_offset_sgpr 96
725            .gds_segment_size 100
726            .kernarg_segment_align 32
727            .workgroup_group_segment_size 22
728            .localsize 23
729            .workgroup_fbarrier_count 3324
730            .hsa_sgprsnum 79
731            .hsa_vgprsnum 167
732            .hsa_scratchbuffer 786
733            .hsa_floatmode 0xdd
734        .control_directive
735        .int 1,2,4
736.text
737aa22:
738    .skip 256
739    .kernel aa22
740    .control_directive
741        .fill 116,1,0
742)ffDXD", R"ffDXD(GalliumBinDump:
743  Kernel: name=aa22, offset=0
744    Config:
745      dims=1, SGPRS=36, VGPRS=139, pgmRSRC2=0x7fbeb, ieeeMode=0x1
746      floatMode=0x12, priority=1, localSize=23, scratchBuffer=230
747    AMD HSA Config:
748      amdCodeVersion=1.1
749      amdMachine=1:0:0:0
750      kernelCodeEntryOffset=256
751      kernelCodePrefetchOffset=0
752      kernelCodePrefetchSize=0
753      maxScrachBackingMemorySize=0
754      computePgmRsrc1=0x8dfa69
755      computePgmRsrc2=0x7e9d1
756      enableSgprRegisterFlags=0xb
757      enableFeatureFlags=0xa
758      workitemPrivateSegmentSize=786
759      workgroupGroupSegmentSize=22
760      gdsSegmentSize=100
761      kernargSegmentSize=24
762      workgroupFbarrierCount=3324
763      wavefrontSgprCount=79
764      workitemVgprCount=167
765      reservedVgprFirst=0
766      reservedVgprCount=0
767      reservedSgprFirst=0
768      reservedSgprCount=0
769      debugWavefrontPrivateSegmentOffsetSgpr=96
770      debugPrivateSegmentBufferSgpr=0
771      kernargSegmentAlignment=5
772      groupSegmentAlignment=4
773      privateSegmentAlignment=4
774      wavefrontSize=6
775      callConvention=0x34dac
776      runtimeLoaderKernelSymbol=0x0
777      ControlDirective:
778      0100000002000000040000000000000000000000000000000000000000000000
779      0000000000000000000000000000000000000000000000000000000000000000
780      0000000000000000000000000000000000000000000000000000000000000000
781      0000000000000000000000000000000000000000000000000000000000000000
782    Arg: scalar, true, general, size=8, tgtSize=8, tgtAlign=8
783    Arg: scalar, false, griddim, size=4, tgtSize=4, tgtAlign=4
784    Arg: scalar, false, gridoffset, size=4, tgtSize=4, tgtAlign=4
785  Comment:
786  nullptr
787  GlobalData:
788  nullptr
789  Code:
790  0100000000000000010000000000000000010000000000000000000000000000
791  0000000000000000000000000000000069fa8d00d1e907000b000a0012030000
792  16000000640000001800000000000000fc0c00004f00a7000000000000000000
793  6000000005040406ac4d03000000000000000000000000000000000000000000
794  0100000002000000040000000000000000000000000000000000000000000000
795  0000000000000000000000000000000000000000000000000000000000000000
796  0000000000000000000000000000000000000000000000000000000000000000
797  0000000000000000000000000000000000000000000000000000000000000000
798)ffDXD", "", true
799    },
800    /* scratch relocations */
801    /* 1 - gallium scratch relocation */
802    { R"ffDXD(            .gallium
803            .kernel aa22
804            .args
805            .arg scalar, 8,,,SEXT,griddim
806            .config
807            .priority 1
808            .floatmode 43
809            .ieeemode
810            .sgprsnum 36
811            .vgprsnum 139
812            .pgmrsrc2 523243
813            .scratchbuffer 230
814        .scratchsym scratch
815.text
816aa22:
817        s_and_b32 s9,s5,44
818        s_and_b32 s10,s5,5
819        s_mov_b32 s1, scratch
820        s_mov_b32 s1, scratch+7*3-21
821        s_mov_b32 s1, (scratch+7*3-21)&(1<<32-1)
822        s_mov_b32 s1, ((scratch)*2-scratch)&(4096*4096*256-1)
823        s_mov_b32 s1, (-scratch+2*(scratch))%(4096*4096*256)
824        s_mov_b32 s1, (-scratch+2*(scratch))%%(4096*4096*256)
825        s_mov_b32 s1, (-scratch+2*(scratch))%%(4096*4096*256*9)
826        s_mov_b32 s1, (-scratch+2*(scratch))%(4096*4096*256*9)
827        s_mov_b32 s1, (scratch+6-6)>>(31-5+6)
828        s_mov_b32 s1, (scratch+6-3*2)>>(234%101)
829        s_mov_b32 s1, (scratch)>>(235%101-1)
830        s_mov_b32 s1, (scratch)/(4096*4096*256)
831        s_mov_b32 s1, (scratch+6-3*2)//(4096*4096*256)
832        s_mov_b32 s1, scratch>>32
833        s_mov_b32 s1, scratch&0xffffffff
834)ffDXD",
835       R"ffDXD(GalliumBinDump:
836  Kernel: name=aa22, offset=0
837    Config:
838      dims=default, SGPRS=36, VGPRS=139, pgmRSRC2=0x7fbeb, ieeeMode=0x1
839      floatMode=0x2b, priority=1, localSize=0, scratchBuffer=230
840    Arg: scalar, true, griddim, size=8, tgtSize=8, tgtAlign=8
841  Scratch relocations:
842    Rel: offset=12, type: 1
843    Rel: offset=20, type: 1
844    Rel: offset=28, type: 1
845    Rel: offset=36, type: 1
846    Rel: offset=44, type: 1
847    Rel: offset=52, type: 1
848    Rel: offset=60, type: 1
849    Rel: offset=68, type: 1
850    Rel: offset=76, type: 2
851    Rel: offset=84, type: 2
852    Rel: offset=92, type: 2
853    Rel: offset=100, type: 2
854    Rel: offset=108, type: 2
855    Rel: offset=116, type: 2
856    Rel: offset=124, type: 1
857  Comment:
858  nullptr
859  GlobalData:
860  nullptr
861  Code:
862  05ac098705850a87ff0381be04000000ff0381be04000000ff0381be04000000
863  ff0381be04000000ff0381be04000000ff0381be04000000ff0381be04000000
864  ff0381be04000000ff0381be04000000ff0381be04000000ff0381be04000000
865  ff0381be04000000ff0381be04000000ff0381be04000000ff0381be04000000
866)ffDXD", "", true
867    },
868    { R"ffDXD(            .gallium
869            .kernel aa22
870            .args
871            .arg scalar, 8,,,SEXT,griddim
872            .config
873            .priority 1
874            .scratchbuffer 230
875        .scratchsym scratch
876.text
877aa22:
878        s_and_b32 s9,s5,44
879        s_and_b32 s10,s5,5
880        s_mov_b32 s1, (scratch+6)&0xffffffff
881)ffDXD", "", "test.s:13:23: Error: Expression must point to start of section\n", false
882    }
883};
884
885static void testAssembler(cxuint testId, const AsmTestCase& testCase)
886{
887    std::istringstream input(testCase.input);
888    std::ostringstream errorStream;
889    std::ostringstream printStream;
890   
891    // create assembler with testcase's input and with ASM_TESTRUN flag
892    Assembler assembler("test.s", input, (ASM_ALL|ASM_TESTRUN)&~ASM_ALTMACRO,
893            BinaryFormat::AMD, GPUDeviceType::CAPE_VERDE, errorStream, printStream);
894    assembler.setLLVMVersion(1);
895    bool good = assembler.assemble();
896   
897    std::ostringstream dumpOss;
898    if (good && assembler.getFormatHandler()!=nullptr)
899        // get format handler and their output
900        printGalliumOutput(dumpOss, static_cast<const AsmGalliumHandler*>(
901                    assembler.getFormatHandler())->getOutput(),
902                    assembler.getLLVMVersion() >= 40000U);
903    /* compare result dump with expected dump */
904    char testName[30];
905    snprintf(testName, 30, "Test #%u", testId);
906   
907    assertValue(testName, "good", int(testCase.good), int(good));
908    assertString(testName, "dump", testCase.dump, dumpOss.str());
909    assertString(testName, "errorMessages", testCase.errors, errorStream.str());
910}
911
912int main(int argc, const char** argv)
913{
914    int retVal = 0;
915    for (size_t i = 0; i < sizeof(asmTestCases1Tbl)/sizeof(AsmTestCase); i++)
916        try
917        { testAssembler(i, asmTestCases1Tbl[i]); }
918        catch(const std::exception& ex)
919        {
920            std::cerr << ex.what() << std::endl;
921            retVal = 1;
922        }
923    return retVal;
924}
Note: See TracBrowser for help on using the repository browser.