source: CLRX/CLRadeonExtender/trunk/tests/amdasm/AsmROCmFormat.cpp @ 3745

Last change on this file since 3745 was 3745, checked in by matszpk, 3 years ago

CLRadeonExtender: AsmROCm: Add a missing '.runtime_handle' pseudo-ops. add new testcases to test ROCm metadata pseudo-ops.

File size: 48.4 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2018 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19
20#include <CLRX/Config.h>
21#include <iostream>
22#include <cstdio>
23#include <sstream>
24#include <algorithm>
25#include <memory>
26#include <CLRX/amdasm/Assembler.h>
27#include "../TestUtils.h"
28
29using namespace CLRX;
30
31static void printHexData(std::ostream& os, cxuint indentLevel, size_t size,
32             const cxbyte* data)
33{
34    if (data==nullptr)
35    {
36        for (cxuint j = 0; j < indentLevel; j++)
37            os << "  ";
38        os << "nullptr\n";
39        return;
40    }
41    for (size_t i = 0; i < size; i++)
42    {
43        if ((i&31)==0)
44            for (cxuint j = 0; j < indentLevel; j++)
45                os << "  ";
46        char buf[10];
47        snprintf(buf, 10, "%02x", cxuint(data[i]));
48        os << buf;
49        if ((i&31)==31 || i+1 == size)
50            os << '\n';
51    }
52}
53
54static const char* rocmRegionTypeNames[3] =
55{ "data", "fkernel", "kernel" };
56
57static const char* rocmValueKindNames[] =
58{
59    "value", "globalbuf", "dynshptr", "sampler", "image", "pipe", "queue",
60    "gox", "goy", "goz", "none", "printfbuf", "defqueue", "complact"
61};
62
63static const char* rocmValueTypeNames[] =
64{ "struct", "i8", "u8", "i16", "u16", "f16", "i32", "u32", "f32", "i64", "u64", "f64" };
65
66static const char* rocmAddressSpaces[] =
67{ "none", "private", "global", "constant", "local", "generic", "region" };
68
69static const char* rocmAccessQuals[] =
70{ "default", "read_only", "write_only", "read_write" };
71
72// print dump of ROCm output to stream for comparing with testcase
73static void printROCmOutput(std::ostream& os, const ROCmInput* output)
74{
75    os << "ROCmBinDump:" << std::endl;
76    for (const ROCmSymbolInput& symbol: output->symbols)
77    {
78        os << "  ROCmSymbol: name=" << symbol.symbolName << ", " <<
79                "offset=" << symbol.offset << ", size=" << symbol.size << ", type=" <<
80                rocmRegionTypeNames[cxuint(symbol.type)] << "\n";
81        if (symbol.type == ROCmRegionType::DATA)
82            continue;
83        if (symbol.offset+sizeof(ROCmKernelConfig) > output->codeSize)
84            continue;
85        const ROCmKernelConfig& config = *reinterpret_cast<const ROCmKernelConfig*>(
86                            output->code + symbol.offset);
87       
88        // print kernel configuration
89        os << "    Config:\n"
90            "      amdCodeVersion=" << ULEV(config.amdCodeVersionMajor) << "." <<
91                ULEV(config.amdCodeVersionMajor) << "\n"
92            "      amdMachine=" << ULEV(config.amdMachineKind) << ":" <<
93                ULEV(config.amdMachineMajor) << ":" <<
94                ULEV(config.amdMachineMinor) << ":" <<
95                ULEV(config.amdMachineStepping) << "\n"
96            "      kernelCodeEntryOffset=" << ULEV(config.kernelCodeEntryOffset) << "\n"
97            "      kernelCodePrefetchOffset=" <<
98                ULEV(config.kernelCodePrefetchOffset) << "\n"
99            "      kernelCodePrefetchSize=" << ULEV(config.kernelCodePrefetchSize) << "\n"
100            "      maxScrachBackingMemorySize=" <<
101                ULEV(config.maxScrachBackingMemorySize) << "\n"
102            "      computePgmRsrc1=0x" << std::hex << ULEV(config.computePgmRsrc1) << "\n"
103            "      computePgmRsrc2=0x" << ULEV(config.computePgmRsrc2) << "\n"
104            "      enableSgprRegisterFlags=0x" <<
105                ULEV(config.enableSgprRegisterFlags) << "\n"
106            "      enableFeatureFlags=0x" <<
107                ULEV(config.enableFeatureFlags) << std::dec << "\n"
108            "      workitemPrivateSegmentSize=" <<
109                ULEV(config.workitemPrivateSegmentSize) << "\n"
110            "      workgroupGroupSegmentSize=" <<
111                ULEV(config.workgroupGroupSegmentSize) << "\n"
112            "      gdsSegmentSize=" << ULEV(config.gdsSegmentSize) << "\n"
113            "      kernargSegmentSize=" << ULEV(config.kernargSegmentSize) << "\n"
114            "      workgroupFbarrierCount=" << ULEV(config.workgroupFbarrierCount) << "\n"
115            "      wavefrontSgprCount=" << ULEV(config.wavefrontSgprCount) << "\n"
116            "      workitemVgprCount=" << ULEV(config.workitemVgprCount) << "\n"
117            "      reservedVgprFirst=" << ULEV(config.reservedVgprFirst) << "\n"
118            "      reservedVgprCount=" << ULEV(config.reservedVgprCount) << "\n"
119            "      reservedSgprFirst=" << ULEV(config.reservedSgprFirst) << "\n"
120            "      reservedSgprCount=" << ULEV(config.reservedSgprCount) << "\n"
121            "      debugWavefrontPrivateSegmentOffsetSgpr=" <<
122                ULEV(config.debugWavefrontPrivateSegmentOffsetSgpr) << "\n"
123            "      debugPrivateSegmentBufferSgpr=" <<
124                ULEV(config.debugPrivateSegmentBufferSgpr) << "\n"
125            "      kernargSegmentAlignment=" << 
126                cxuint(config.kernargSegmentAlignment) << "\n"
127            "      groupSegmentAlignment=" <<
128                cxuint(config.groupSegmentAlignment) << "\n"
129            "      privateSegmentAlignment=" <<
130                cxuint(config.privateSegmentAlignment) << "\n"
131            "      wavefrontSize=" << cxuint(config.wavefrontSize) << "\n"
132            "      callConvention=0x" << std::hex << ULEV(config.callConvention) << "\n"
133            "      runtimeLoaderKernelSymbol=0x" <<
134                ULEV(config.runtimeLoaderKernelSymbol) << std::dec << "\n";
135        os << "      ControlDirective:\n";
136        printHexData(os, 3, 128, config.controlDirective);
137    }
138    // print comment and code
139    os << "  Comment:\n";
140    printHexData(os, 1, output->commentSize, (const cxbyte*)output->comment);
141    os << "  Code:\n";
142    printHexData(os, 1, output->codeSize, output->code);
143    if (output->globalData != nullptr)
144    {
145        os << "  GlobalData:\n";
146        printHexData(os, 1, output->globalDataSize, output->globalData);
147    }
148   
149    if (output->metadata != nullptr)
150        os << "  Metadata:\n" << std::string(output->metadata,
151                            output->metadataSize) << "\n";
152   
153    // dump ROCm metadata
154    if (output->useMetadataInfo)
155    {
156        const ROCmMetadata& metadata = output->metadataInfo;
157        os << "  MetadataInfo:\n"
158            "    Version: " << metadata.version[0] << "." << metadata.version[1] << "\n";
159        // dump printf info
160        for (const ROCmPrintfInfo& printfInfo: metadata.printfInfos)
161        {
162            os << "    Printf: " << printfInfo.id;
163            for (size_t argSize: printfInfo.argSizes)
164                os << ", " << argSize;
165            os << "; \"" << printfInfo.format << "\"\n";
166        }
167        // dump kernel metadata
168        for (const ROCmKernelMetadata& kernel: metadata.kernels)
169        {
170            os << "    Kernel: " << kernel.name << "\n"
171                "      SymName=" << kernel.symbolName << "\n"
172                "      Language=" << kernel.language << " " <<
173                        kernel.langVersion[0] << "." << kernel.langVersion[1] << "\n"
174                "      ReqdWorkGroupSize=" << kernel.reqdWorkGroupSize[0] << " " <<
175                        kernel.reqdWorkGroupSize[1] << " " <<
176                        kernel.reqdWorkGroupSize[2] << "\n"
177                "      WorkGroupSizeHint=" << kernel.workGroupSizeHint[0] << " " <<
178                        kernel.workGroupSizeHint[1] << " " <<
179                        kernel.workGroupSizeHint[2] << "\n"
180                "      VecTypeHint=" << kernel.vecTypeHint << "\n"
181                "      RuntimeHandle=" << kernel.runtimeHandle << "\n"
182                "      KernargSegmentSize=" << kernel.kernargSegmentSize << "\n"
183                "      KernargSegmentAlign=" << kernel.kernargSegmentAlign << "\n"
184                "      GroupSegmentFixedSize=" << kernel.groupSegmentFixedSize<< "\n"
185                "      PrivateSegmentFixedSize=" << kernel.privateSegmentFixedSize<< "\n"
186                "      WaveFrontSize=" << kernel.wavefrontSize << "\n"
187                "      SgprsNum=" << kernel.sgprsNum << "\n"
188                "      VgprsNum=" << kernel.vgprsNum << "\n"
189                "      SpilledSgprs=" << kernel.spilledSgprs << "\n"
190                "      SpilledVgprs=" << kernel.spilledVgprs << "\n"
191                "      MaxFlatWorkGroupSize=" << kernel.maxFlatWorkGroupSize << "\n"
192                "      FixedWorkGroupSize=" << kernel.fixedWorkGroupSize[0] << " " <<
193                        kernel.fixedWorkGroupSize[1] << " " <<
194                        kernel.fixedWorkGroupSize[2] << "\n";
195           
196            // dump kernel arguments
197            for (const ROCmKernelArgInfo& argInfo: kernel.argInfos)
198                os << "      Arg name=" << argInfo.name << ", type=" << argInfo.typeName <<
199                    ", size=" << argInfo.size << ", align=" << argInfo.align << "\n"
200                    "        valuekind=" <<
201                            rocmValueKindNames[cxuint(argInfo.valueKind)] <<
202                    ", valuetype=" << rocmValueTypeNames[cxuint(argInfo.valueType)] <<
203                    ", pointeeAlign=" << argInfo.pointeeAlign << "\n"
204                    "        addrSpace=" <<
205                            rocmAddressSpaces[cxuint(argInfo.addressSpace)] <<
206                    ", accQual=" << rocmAccessQuals[cxuint(argInfo.accessQual)] <<
207                    ", actAccQual=" <<
208                            rocmAccessQuals[cxuint(argInfo.actualAccessQual)] << "\n"
209                    "        Flags=" <<
210                    (argInfo.isConst ? " const" : "") <<
211                    (argInfo.isRestrict ? " restrict" : "") <<
212                    (argInfo.isVolatile ? " volatile" : "") <<
213                    (argInfo.isPipe ? " pipe" : "") << "\n";
214        }
215    }
216   
217    if (!output->target.empty())
218        os << "  Target=" << output->target << "\n";
219    if (output->eflags != BINGEN_DEFAULT)
220        os << "  EFlags=" << output->eflags << std::endl;
221   
222    if (output->newBinFormat)
223        os << "  NewBinFormat\n";
224   
225    // print extra sections if supplied
226    for (BinSection section: output->extraSections)
227    {
228        os << "  Section " << section.name << ", type=" << section.type <<
229                        ", flags=" << section.flags << ":\n";
230        printHexData(os, 1, section.size, section.data);
231    }
232    // print extra symbols if supplied
233    for (BinSymbol symbol: output->extraSymbols)
234        os << "  Symbol: name=" << symbol.name << ", value=" << symbol.value <<
235                ", size=" << symbol.size << ", section=" << symbol.sectionId << "\n";
236    os.flush();
237}
238
239
240struct AsmTestCase
241{
242    const char* input;
243    const char* dump;
244    const char* errors;
245    bool good;
246};
247
248static const AsmTestCase asmTestCases1Tbl[] =
249{
250    {
251        R"ffDXD(        .rocm
252        .gpu Fiji
253.kernel kxx1
254    .fkernel
255    .config
256        .dims x
257        .codeversion 1,0
258        .call_convention 0x34dac
259        .debug_private_segment_buffer_sgpr 98
260        .debug_wavefront_private_segment_offset_sgpr 96
261        .gds_segment_size 100
262        .kernarg_segment_align 32
263        .workgroup_group_segment_size 22
264        .workgroup_fbarrier_count 3324
265        .dx10clamp
266        .exceptions 10
267        .private_segment_align 128
268        .privmode
269        .reserved_sgprs 5,14
270        .runtime_loader_kernel_symbol 0x4dc98b3a
271        .scratchbuffer 77222
272        .reserved_sgprs 9,12
273        .reserved_vgprs 7,17
274        .private_elem_size 16
275    .control_directive
276        .int 1,2,3
277        .fill 116,1,0
278.kernel kxx2
279    .config
280        .dims x
281        .codeversion 1,0
282        .call_convention 0x112223
283.kernel kxx1
284    .config
285        .scratchbuffer 111
286.text
287kxx1:
288        .skip 256
289        s_mov_b32 s7, 0
290        s_endpgm
291       
292.align 256
293kxx2:
294        .skip 256
295        s_endpgm
296.section .comment
297        .ascii "some comment for you"
298.kernel kxx2
299    .control_directive
300        .fill 124,1,0xde
301    .config
302        .use_kernarg_segment_ptr
303    .control_directive
304        .int 0xaadd66cc
305    .config
306.kernel kxx1
307.kernel kxx2
308        .call_convention 0x1112234
309       
310)ffDXD",
311        /* dump */
312        R"ffDXD(ROCmBinDump:
313  ROCmSymbol: name=kxx1, offset=0, size=0, type=fkernel
314    Config:
315      amdCodeVersion=1.1
316      amdMachine=1:8:0:3
317      kernelCodeEntryOffset=256
318      kernelCodePrefetchOffset=0
319      kernelCodePrefetchSize=0
320      maxScrachBackingMemorySize=0
321      computePgmRsrc1=0x3c0040
322      computePgmRsrc2=0xa008081
323      enableSgprRegisterFlags=0x0
324      enableFeatureFlags=0x6
325      workitemPrivateSegmentSize=111
326      workgroupGroupSegmentSize=22
327      gdsSegmentSize=100
328      kernargSegmentSize=0
329      workgroupFbarrierCount=3324
330      wavefrontSgprCount=10
331      workitemVgprCount=1
332      reservedVgprFirst=7
333      reservedVgprCount=11
334      reservedSgprFirst=9
335      reservedSgprCount=4
336      debugWavefrontPrivateSegmentOffsetSgpr=96
337      debugPrivateSegmentBufferSgpr=98
338      kernargSegmentAlignment=5
339      groupSegmentAlignment=4
340      privateSegmentAlignment=7
341      wavefrontSize=6
342      callConvention=0x34dac
343      runtimeLoaderKernelSymbol=0x4dc98b3a
344      ControlDirective:
345      0100000002000000030000000000000000000000000000000000000000000000
346      0000000000000000000000000000000000000000000000000000000000000000
347      0000000000000000000000000000000000000000000000000000000000000000
348      0000000000000000000000000000000000000000000000000000000000000000
349  ROCmSymbol: name=kxx2, offset=512, size=0, type=kernel
350    Config:
351      amdCodeVersion=1.1
352      amdMachine=1:8:0:3
353      kernelCodeEntryOffset=256
354      kernelCodePrefetchOffset=0
355      kernelCodePrefetchSize=0
356      maxScrachBackingMemorySize=0
357      computePgmRsrc1=0xc0000
358      computePgmRsrc2=0x84
359      enableSgprRegisterFlags=0x8
360      enableFeatureFlags=0x0
361      workitemPrivateSegmentSize=0
362      workgroupGroupSegmentSize=0
363      gdsSegmentSize=0
364      kernargSegmentSize=0
365      workgroupFbarrierCount=0
366      wavefrontSgprCount=5
367      workitemVgprCount=1
368      reservedVgprFirst=0
369      reservedVgprCount=0
370      reservedSgprFirst=0
371      reservedSgprCount=0
372      debugWavefrontPrivateSegmentOffsetSgpr=0
373      debugPrivateSegmentBufferSgpr=0
374      kernargSegmentAlignment=4
375      groupSegmentAlignment=4
376      privateSegmentAlignment=4
377      wavefrontSize=6
378      callConvention=0x1112234
379      runtimeLoaderKernelSymbol=0x0
380      ControlDirective:
381      dededededededededededededededededededededededededededededededede
382      dededededededededededededededededededededededededededededededede
383      dededededededededededededededededededededededededededededededede
384      dedededededededededededededededededededededededededededecc66ddaa
385  Comment:
386  736f6d6520636f6d6d656e7420666f7220796f75
387  Code:
388  0100000000000000010008000000030000010000000000000000000000000000
389  0000000000000000000000000000000040003c008180000a000006006f000000
390  16000000640000000000000000000000fc0c00000a00010007000b0009000400
391  6000620005040706ac4d03000000000000000000000000003a8bc94d00000000
392  0100000002000000030000000000000000000000000000000000000000000000
393  0000000000000000000000000000000000000000000000000000000000000000
394  0000000000000000000000000000000000000000000000000000000000000000
395  0000000000000000000000000000000000000000000000000000000000000000
396  800087be000081bf000080bf000080bf000080bf000080bf000080bf000080bf
397  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
398  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
399  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
400  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
401  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
402  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
403  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
404  0100000000000000010008000000030000010000000000000000000000000000
405  0000000000000000000000000000000000000c00840000000800000000000000
406  0000000000000000000000000000000000000000050001000000000000000000
407  0000000004040406342211010000000000000000000000000000000000000000
408  dededededededededededededededededededededededededededededededede
409  dededededededededededededededededededededededededededededededede
410  dededededededededededededededededededededededededededededededede
411  dedededededededededededededededededededededededededededecc66ddaa
412  000081bf
413)ffDXD",
414        /* warning/errors */
415        "",
416        true
417    },
418    {
419        R"ffDXD(        .rocm
420        .gpu Fiji
421.kernel someKernelX
422    .config
423        .dims xz
424        .call_convention 331
425        .codeversion 1,0
426        .machine 8,0,1,2
427        .debug_private_segment_buffer_sgpr 10
428        .debug_wavefront_private_segment_offset_sgpr 31
429        .exceptions 0x3e
430        .floatmode 0xc3
431        .gds_segment_size 105
432        .group_segment_align 128
433        .kernarg_segment_align 64
434        .kernarg_segment_size 228
435        .kernel_code_entry_offset 256
436        .kernel_code_prefetch_offset 1002
437        .kernel_code_prefetch_size 13431
438        .max_scratch_backing_memory 4212
439        .pgmrsrc1 0xa0000000
440        .pgmrsrc2 0xd00000
441        .priority 2
442        .private_elem_size 8
443        .private_segment_align 32
444        .reserved_sgprs 12,19
445        .reserved_vgprs 26,48
446        .runtime_loader_kernel_symbol 0x3eda1
447        .scratchbuffer 2330
448        .use_debug_enabled
449        .use_flat_scratch_init
450        .use_grid_workgroup_count xz
451        .use_private_segment_buffer
452        .use_ptr64
453        .use_xnack_enabled
454        .wavefront_size 256
455        .workgroup_fbarrier_count 69
456        .workgroup_group_segment_size 324
457        .workitem_private_segment_size 33
458        .vgprsnum 211
459        .sgprsnum 85
460.text
461someKernelX:
462        .skip 256
463        s_endpgm)ffDXD",
464        R"ffDXD(ROCmBinDump:
465  ROCmSymbol: name=someKernelX, offset=0, size=0, type=kernel
466    Config:
467      amdCodeVersion=1.1
468      amdMachine=8:0:1:2
469      kernelCodeEntryOffset=256
470      kernelCodePrefetchOffset=1002
471      kernelCodePrefetchSize=13431
472      maxScrachBackingMemorySize=4212
473      computePgmRsrc1=0xa00c3ab4
474      computePgmRsrc2=0x3ed09291
475      enableSgprRegisterFlags=0x2a1
476      enableFeatureFlags=0x6c
477      workitemPrivateSegmentSize=33
478      workgroupGroupSegmentSize=324
479      gdsSegmentSize=105
480      kernargSegmentSize=228
481      workgroupFbarrierCount=69
482      wavefrontSgprCount=85
483      workitemVgprCount=211
484      reservedVgprFirst=26
485      reservedVgprCount=23
486      reservedSgprFirst=12
487      reservedSgprCount=8
488      debugWavefrontPrivateSegmentOffsetSgpr=31
489      debugPrivateSegmentBufferSgpr=10
490      kernargSegmentAlignment=6
491      groupSegmentAlignment=7
492      privateSegmentAlignment=5
493      wavefrontSize=8
494      callConvention=0x14b
495      runtimeLoaderKernelSymbol=0x3eda1
496      ControlDirective:
497      0000000000000000000000000000000000000000000000000000000000000000
498      0000000000000000000000000000000000000000000000000000000000000000
499      0000000000000000000000000000000000000000000000000000000000000000
500      0000000000000000000000000000000000000000000000000000000000000000
501  Comment:
502  nullptr
503  Code:
504  010000000000000008000000010002000001000000000000ea03000000000000
505  77340000000000007410000000000000b43a0ca09192d03ea1026c0021000000
506  4401000069000000e400000000000000450000005500d3001a0017000c000800
507  1f000a00060705084b010000000000000000000000000000a1ed030000000000
508  0000000000000000000000000000000000000000000000000000000000000000
509  0000000000000000000000000000000000000000000000000000000000000000
510  0000000000000000000000000000000000000000000000000000000000000000
511  0000000000000000000000000000000000000000000000000000000000000000
512  000081bf
513)ffDXD",
514        /* warning/errors */
515        "",
516        true
517    },
518    {
519        R"ffDXD(        .rocm
520        .gpu Fiji
521.kernel someKernelX
522    .config
523        .dims xz
524        .reserved_vgprs 0, 11
525.text
526someKernelX:
527        s_endpgm)ffDXD",
528        "", "test.s:3:1: Error: "
529        "Code for kernel 'someKernelX' is too small for configuration\n", false
530    },
531    {
532        R"ffDXD(        .rocm
533        .gpu Fiji
534.kernel someKernelX
535    .config
536        .dims xz
537        .reserved_vgprs 12,11
538        .reserved_sgprs 17,11
539        .reserved_vgprs 256,257
540        .reserved_sgprs 112,113
541        .debug_private_segment_buffer_sgpr 123
542        .debug_wavefront_private_segment_offset_sgpr 108
543        .private_elem_size 6
544        .private_elem_size 1
545        .private_elem_size 32
546        .kernarg_segment_align 56
547        .kernarg_segment_align 8
548        .private_segment_align 56
549        .private_segment_align 8
550        .wavefront_size 157
551        .wavefront_size 512
552        .pgmrsrc2 0xaa1fd3da2313
553.text
554someKernelX:
555        .skip 256
556        s_endpgm)ffDXD",
557        "", R"ffDXD(test.s:6:28: Error: Wrong register range
558test.s:7:28: Error: Wrong register range
559test.s:8:25: Error: First reserved VGPR register out of range (0-255)
560test.s:8:29: Error: Last reserved VGPR register out of range (0-255)
561test.s:9:25: Error: First reserved SGPR register out of range (0-101)
562test.s:9:29: Error: Last reserved SGPR register out of range (0-101)
563test.s:10:44: Error: SGPR register out of range
564test.s:11:54: Error: SGPR register out of range
565test.s:12:28: Error: Private element size must be power of two
566test.s:13:28: Error: Private element size out of range
567test.s:14:28: Error: Private element size out of range
568test.s:15:32: Error: Alignment must be power of two
569test.s:16:32: Error: Alignment must be not smaller than 16
570test.s:17:32: Error: Alignment must be power of two
571test.s:18:32: Error: Alignment must be not smaller than 16
572test.s:19:25: Error: Wavefront size must be power of two
573test.s:20:25: Error: Wavefront size must be not greater than 256
574test.s:21:19: Warning: Value 0xaa1fd3da2313 truncated to 0xd3da2313
575)ffDXD", false
576    },
577    {   // different eflags
578        R"ffDXD(.rocm
579        .gpu Fiji
580        .eflags 3
581.kernel kxx1
582    .config
583        .dims x
584        .codeversion 1,0
585        .call_convention 0x34dac
586        .debug_private_segment_buffer_sgpr 98
587        .debug_wavefront_private_segment_offset_sgpr 96
588        .gds_segment_size 100
589        .kernarg_segment_align 32
590        .workgroup_group_segment_size 22
591        .workgroup_fbarrier_count 3324
592        .dx10clamp
593        .exceptions 10
594        .private_segment_align 128
595        .privmode
596        .reserved_sgprs 5,14
597        .runtime_loader_kernel_symbol 0x4dc98b3a
598        .scratchbuffer 77222
599        .reserved_sgprs 9,12
600        .reserved_vgprs 7,17
601        .private_elem_size 16
602    .control_directive
603        .int 1,2,3
604        .fill 116,1,0
605.text
606kxx1:
607        .skip 256
608        s_mov_b32 s7, 0
609        s_endpgm
610)ffDXD",
611        R"ffDXD(ROCmBinDump:
612  ROCmSymbol: name=kxx1, offset=0, size=0, type=kernel
613    Config:
614      amdCodeVersion=1.1
615      amdMachine=1:8:0:3
616      kernelCodeEntryOffset=256
617      kernelCodePrefetchOffset=0
618      kernelCodePrefetchSize=0
619      maxScrachBackingMemorySize=0
620      computePgmRsrc1=0x3c0040
621      computePgmRsrc2=0xa008081
622      enableSgprRegisterFlags=0x0
623      enableFeatureFlags=0x6
624      workitemPrivateSegmentSize=77222
625      workgroupGroupSegmentSize=22
626      gdsSegmentSize=100
627      kernargSegmentSize=0
628      workgroupFbarrierCount=3324
629      wavefrontSgprCount=10
630      workitemVgprCount=1
631      reservedVgprFirst=7
632      reservedVgprCount=11
633      reservedSgprFirst=9
634      reservedSgprCount=4
635      debugWavefrontPrivateSegmentOffsetSgpr=96
636      debugPrivateSegmentBufferSgpr=98
637      kernargSegmentAlignment=5
638      groupSegmentAlignment=4
639      privateSegmentAlignment=7
640      wavefrontSize=6
641      callConvention=0x34dac
642      runtimeLoaderKernelSymbol=0x4dc98b3a
643      ControlDirective:
644      0100000002000000030000000000000000000000000000000000000000000000
645      0000000000000000000000000000000000000000000000000000000000000000
646      0000000000000000000000000000000000000000000000000000000000000000
647      0000000000000000000000000000000000000000000000000000000000000000
648  Comment:
649  nullptr
650  Code:
651  0100000000000000010008000000030000010000000000000000000000000000
652  0000000000000000000000000000000040003c008180000a00000600a62d0100
653  16000000640000000000000000000000fc0c00000a00010007000b0009000400
654  6000620005040706ac4d03000000000000000000000000003a8bc94d00000000
655  0100000002000000030000000000000000000000000000000000000000000000
656  0000000000000000000000000000000000000000000000000000000000000000
657  0000000000000000000000000000000000000000000000000000000000000000
658  0000000000000000000000000000000000000000000000000000000000000000
659  800087be000081bf
660  EFlags=3
661)ffDXD", "", true
662    },
663    {   // metadata and others
664        R"ffDXD(.rocm
665        .gpu Fiji
666        .eflags 3
667        .newbinfmt
668.metadata
669        .ascii "sometext in this place\n"
670        .ascii "maybe not unrecognizable by parser but it is understandable by human\n"
671.globaldata
672        .byte 1,2,3,4,5,5,6,33
673.kernel kxx1
674    .config
675        .dims x
676        .codeversion 1,0
677        .call_convention 0x34dac
678        .debug_private_segment_buffer_sgpr 98
679        .debug_wavefront_private_segment_offset_sgpr 96
680        .gds_segment_size 100
681        .kernarg_segment_align 32
682        .workgroup_group_segment_size 22
683        .workgroup_fbarrier_count 3324
684        .dx10clamp
685        .exceptions 10
686        .private_segment_align 128
687        .privmode
688        .reserved_sgprs 5,14
689        .runtime_loader_kernel_symbol 0x4dc98b3a
690        .scratchbuffer 77222
691        .reserved_sgprs 9,12
692        .reserved_vgprs 7,17
693        .private_elem_size 16
694    .control_directive
695        .int 1,2,3
696        .fill 116,1,0
697.text
698kxx1:
699        .skip 256
700        s_mov_b32 s7, 0
701        s_endpgm
702)ffDXD",
703        R"ffDXD(ROCmBinDump:
704  ROCmSymbol: name=kxx1, offset=0, size=0, type=kernel
705    Config:
706      amdCodeVersion=1.1
707      amdMachine=1:8:0:3
708      kernelCodeEntryOffset=256
709      kernelCodePrefetchOffset=0
710      kernelCodePrefetchSize=0
711      maxScrachBackingMemorySize=0
712      computePgmRsrc1=0x3c0040
713      computePgmRsrc2=0xa008081
714      enableSgprRegisterFlags=0x0
715      enableFeatureFlags=0x6
716      workitemPrivateSegmentSize=77222
717      workgroupGroupSegmentSize=22
718      gdsSegmentSize=100
719      kernargSegmentSize=0
720      workgroupFbarrierCount=3324
721      wavefrontSgprCount=10
722      workitemVgprCount=1
723      reservedVgprFirst=7
724      reservedVgprCount=11
725      reservedSgprFirst=9
726      reservedSgprCount=4
727      debugWavefrontPrivateSegmentOffsetSgpr=96
728      debugPrivateSegmentBufferSgpr=98
729      kernargSegmentAlignment=5
730      groupSegmentAlignment=4
731      privateSegmentAlignment=7
732      wavefrontSize=6
733      callConvention=0x34dac
734      runtimeLoaderKernelSymbol=0x4dc98b3a
735      ControlDirective:
736      0100000002000000030000000000000000000000000000000000000000000000
737      0000000000000000000000000000000000000000000000000000000000000000
738      0000000000000000000000000000000000000000000000000000000000000000
739      0000000000000000000000000000000000000000000000000000000000000000
740  Comment:
741  nullptr
742  Code:
743  0100000000000000010008000000030000010000000000000000000000000000
744  0000000000000000000000000000000040003c008180000a00000600a62d0100
745  16000000640000000000000000000000fc0c00000a00010007000b0009000400
746  6000620005040706ac4d03000000000000000000000000003a8bc94d00000000
747  0100000002000000030000000000000000000000000000000000000000000000
748  0000000000000000000000000000000000000000000000000000000000000000
749  0000000000000000000000000000000000000000000000000000000000000000
750  0000000000000000000000000000000000000000000000000000000000000000
751  800087be000081bf
752  GlobalData:
753  0102030405050621
754  Metadata:
755sometext in this place
756maybe not unrecognizable by parser but it is understandable by human
757
758  EFlags=3
759  NewBinFormat
760)ffDXD", "", true
761    },
762    {   // metadata info
763        R"ffDXD(.rocm
764        .gpu Fiji
765        .eflags 3
766        .newbinfmt
767        .md_version 3 , 5
768        .printf 1 ,5 ,7 , 2,  11, "sometext %d %e %f"
769        .printf 2 ,"sometext"
770        .printf  , 16 ,8 , 2,  4, "sometext %d %e %f"
771.kernel kxx1
772    .config
773        .dims x
774        .codeversion 1,0
775        .call_convention 0x34dac
776        .debug_private_segment_buffer_sgpr 98
777        .debug_wavefront_private_segment_offset_sgpr 96
778        .gds_segment_size 100
779        .kernarg_segment_align 32
780    # metadata
781        .md_symname "kxx1@kd"
782        .md_language "Poliglot", 3, 1
783        .reqd_work_group_size 6,2,4
784        .work_group_size_hint 5,7,2
785        .vectypehint float16
786        .spilledsgprs 11
787        .spilledvgprs 52
788        .md_kernarg_segment_size 64
789        .md_kernarg_segment_align 8
790        .md_group_segment_fixed_size 0
791        .md_private_segment_fixed_size 0
792        .md_wavefront_size 64
793        .md_sgprsnum 14
794        .md_vgprsnum 11
795        .max_flat_work_group_size 256
796        .arg n, "uint", 4, , value, u32
797        .arg n2, "uint", 12, , value, u32
798        .arg x0, "char", 1, 16, value, char
799        .arg x1, "int8", 1, 16, value, i8
800        .arg x2, "short", 2, 16, value, short
801        .arg x3, "int16", 2, 16, value, i16
802        .arg x4, "int", 4, 16, value, int
803        .arg x5, "int32", 4, 16, value, i32
804        .arg x6, "long", 8, 16, value, long
805        .arg x7, "int64", 8, 16, value, i64
806        .arg x8, "uchar", 1, 16, value, uchar
807        .arg x9, "uint8", 1, 16, value, u8
808        .arg x10, "ushort", 2, 16, value, ushort
809        .arg x11, "uint16", 2, 16, value, u16
810        .arg x12, "uint", 4, 16, value, uint
811        .arg x13, "uint32", 4, 16, value, u32
812        .arg x14, "ulong", 8, 16, value, ulong
813        .arg x15, "uint64", 8, 16, value, u64
814        .arg x16, "half", 2, 16, value, half
815        .arg x17, "fp16", 2, 16, value, f16
816        .arg x18, "float", 4, 16, value, float
817        .arg x19, "fp32", 4, 16, value, f32
818        .arg x20, "double", 8, 16, value, double
819        .arg x21, "fp64", 8, 16, value, f64
820        .arg a, "float*", 8, 8, globalbuf, f32, global, default const volatile
821        .arg abuf, "float*", 8, 8, globalbuf, f32, constant, default
822        .arg abuf2, "float*", 8, 8, dynshptr, f32, 1, local
823        .arg abuf3, "float*", 8, 8, globalbuf, f32, generic, default
824        .arg abuf4, "float*", 8, 8, globalbuf, f32, region, default
825        .arg abuf5, "float*", 8, 8, dynshptr, f32, 1, private
826        .arg bbuf, "float*", 8, 8, globalbuf, f32, global, read_only
827        .arg bbuf2, "float*", 8, 8, globalbuf, f32, global, write_only
828        .arg bbuf3, "float*", 8, 8, globalbuf, f32, global, read_write
829        .arg img1, "image1d_t", 8, 8, image, struct, read_only, default
830        .arg img2, "image1d_t", 8, 8, image, struct, write_only, default
831        .arg img3, "image1d_t", 8, 8, image, struct, read_write, default
832        .arg , "", 8, 8, gox, i64
833        .arg , "", 8, 8, goy, i64
834        .arg , "", 8, 8, goz, i64
835        .arg , "", 8, 8, globaloffsetx, i64
836        .arg , "", 8, 8, globaloffsety, i64
837        .arg , "", 8, 8, globaloffsetz, i64
838.text
839kxx1:   .skip 256
840        s_mov_b32 s7, 0
841        s_endpgm
842)ffDXD",
843        R"ffDXD(ROCmBinDump:
844  ROCmSymbol: name=kxx1, offset=0, size=0, type=kernel
845    Config:
846      amdCodeVersion=1.1
847      amdMachine=1:8:0:3
848      kernelCodeEntryOffset=256
849      kernelCodePrefetchOffset=0
850      kernelCodePrefetchSize=0
851      maxScrachBackingMemorySize=0
852      computePgmRsrc1=0xc0040
853      computePgmRsrc2=0x80
854      enableSgprRegisterFlags=0x0
855      enableFeatureFlags=0x0
856      workitemPrivateSegmentSize=0
857      workgroupGroupSegmentSize=0
858      gdsSegmentSize=100
859      kernargSegmentSize=0
860      workgroupFbarrierCount=0
861      wavefrontSgprCount=10
862      workitemVgprCount=1
863      reservedVgprFirst=0
864      reservedVgprCount=0
865      reservedSgprFirst=0
866      reservedSgprCount=0
867      debugWavefrontPrivateSegmentOffsetSgpr=96
868      debugPrivateSegmentBufferSgpr=98
869      kernargSegmentAlignment=5
870      groupSegmentAlignment=4
871      privateSegmentAlignment=4
872      wavefrontSize=6
873      callConvention=0x34dac
874      runtimeLoaderKernelSymbol=0x0
875      ControlDirective:
876      0000000000000000000000000000000000000000000000000000000000000000
877      0000000000000000000000000000000000000000000000000000000000000000
878      0000000000000000000000000000000000000000000000000000000000000000
879      0000000000000000000000000000000000000000000000000000000000000000
880  Comment:
881  nullptr
882  Code:
883  0100000000000000010008000000030000010000000000000000000000000000
884  0000000000000000000000000000000040000c00800000000000000000000000
885  00000000640000000000000000000000000000000a0001000000000000000000
886  6000620005040406ac4d03000000000000000000000000000000000000000000
887  0000000000000000000000000000000000000000000000000000000000000000
888  0000000000000000000000000000000000000000000000000000000000000000
889  0000000000000000000000000000000000000000000000000000000000000000
890  0000000000000000000000000000000000000000000000000000000000000000
891  800087be000081bf
892  MetadataInfo:
893    Version: 3.5
894    Printf: 1, 5, 7, 2, 11; "sometext %d %e %f"
895    Printf: 2; "sometext"
896    Printf: 4294967295, 16, 8, 2, 4; "sometext %d %e %f"
897    Kernel: kxx1
898      SymName=kxx1@kd
899      Language=Poliglot 3.1
900      ReqdWorkGroupSize=6 2 4
901      WorkGroupSizeHint=5 7 2
902      VecTypeHint=float16
903      RuntimeHandle=
904      KernargSegmentSize=64
905      KernargSegmentAlign=8
906      GroupSegmentFixedSize=0
907      PrivateSegmentFixedSize=0
908      WaveFrontSize=64
909      SgprsNum=14
910      VgprsNum=11
911      SpilledSgprs=11
912      SpilledVgprs=52
913      MaxFlatWorkGroupSize=256
914      FixedWorkGroupSize=0 0 0
915      Arg name=n, type=uint, size=4, align=4
916        valuekind=value, valuetype=u32, pointeeAlign=0
917        addrSpace=none, accQual=default, actAccQual=default
918        Flags=
919      Arg name=n2, type=uint, size=12, align=16
920        valuekind=value, valuetype=u32, pointeeAlign=0
921        addrSpace=none, accQual=default, actAccQual=default
922        Flags=
923      Arg name=x0, type=char, size=1, align=16
924        valuekind=value, valuetype=i8, pointeeAlign=0
925        addrSpace=none, accQual=default, actAccQual=default
926        Flags=
927      Arg name=x1, type=int8, size=1, align=16
928        valuekind=value, valuetype=i8, pointeeAlign=0
929        addrSpace=none, accQual=default, actAccQual=default
930        Flags=
931      Arg name=x2, type=short, size=2, align=16
932        valuekind=value, valuetype=i16, pointeeAlign=0
933        addrSpace=none, accQual=default, actAccQual=default
934        Flags=
935      Arg name=x3, type=int16, size=2, align=16
936        valuekind=value, valuetype=i16, pointeeAlign=0
937        addrSpace=none, accQual=default, actAccQual=default
938        Flags=
939      Arg name=x4, type=int, size=4, align=16
940        valuekind=value, valuetype=i32, pointeeAlign=0
941        addrSpace=none, accQual=default, actAccQual=default
942        Flags=
943      Arg name=x5, type=int32, size=4, align=16
944        valuekind=value, valuetype=i32, pointeeAlign=0
945        addrSpace=none, accQual=default, actAccQual=default
946        Flags=
947      Arg name=x6, type=long, size=8, align=16
948        valuekind=value, valuetype=i64, pointeeAlign=0
949        addrSpace=none, accQual=default, actAccQual=default
950        Flags=
951      Arg name=x7, type=int64, size=8, align=16
952        valuekind=value, valuetype=i64, pointeeAlign=0
953        addrSpace=none, accQual=default, actAccQual=default
954        Flags=
955      Arg name=x8, type=uchar, size=1, align=16
956        valuekind=value, valuetype=u8, pointeeAlign=0
957        addrSpace=none, accQual=default, actAccQual=default
958        Flags=
959      Arg name=x9, type=uint8, size=1, align=16
960        valuekind=value, valuetype=u8, pointeeAlign=0
961        addrSpace=none, accQual=default, actAccQual=default
962        Flags=
963      Arg name=x10, type=ushort, size=2, align=16
964        valuekind=value, valuetype=i16, pointeeAlign=0
965        addrSpace=none, accQual=default, actAccQual=default
966        Flags=
967      Arg name=x11, type=uint16, size=2, align=16
968        valuekind=value, valuetype=u16, pointeeAlign=0
969        addrSpace=none, accQual=default, actAccQual=default
970        Flags=
971      Arg name=x12, type=uint, size=4, align=16
972        valuekind=value, valuetype=u32, pointeeAlign=0
973        addrSpace=none, accQual=default, actAccQual=default
974        Flags=
975      Arg name=x13, type=uint32, size=4, align=16
976        valuekind=value, valuetype=u32, pointeeAlign=0
977        addrSpace=none, accQual=default, actAccQual=default
978        Flags=
979      Arg name=x14, type=ulong, size=8, align=16
980        valuekind=value, valuetype=u64, pointeeAlign=0
981        addrSpace=none, accQual=default, actAccQual=default
982        Flags=
983      Arg name=x15, type=uint64, size=8, align=16
984        valuekind=value, valuetype=u64, pointeeAlign=0
985        addrSpace=none, accQual=default, actAccQual=default
986        Flags=
987      Arg name=x16, type=half, size=2, align=16
988        valuekind=value, valuetype=f16, pointeeAlign=0
989        addrSpace=none, accQual=default, actAccQual=default
990        Flags=
991      Arg name=x17, type=fp16, size=2, align=16
992        valuekind=value, valuetype=f16, pointeeAlign=0
993        addrSpace=none, accQual=default, actAccQual=default
994        Flags=
995      Arg name=x18, type=float, size=4, align=16
996        valuekind=value, valuetype=f32, pointeeAlign=0
997        addrSpace=none, accQual=default, actAccQual=default
998        Flags=
999      Arg name=x19, type=fp32, size=4, align=16
1000        valuekind=value, valuetype=f32, pointeeAlign=0
1001        addrSpace=none, accQual=default, actAccQual=default
1002        Flags=
1003      Arg name=x20, type=double, size=8, align=16
1004        valuekind=value, valuetype=f64, pointeeAlign=0
1005        addrSpace=none, accQual=default, actAccQual=default
1006        Flags=
1007      Arg name=x21, type=fp64, size=8, align=16
1008        valuekind=value, valuetype=f64, pointeeAlign=0
1009        addrSpace=none, accQual=default, actAccQual=default
1010        Flags=
1011      Arg name=a, type=float*, size=8, align=8
1012        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1013        addrSpace=global, accQual=default, actAccQual=default
1014        Flags= const volatile
1015      Arg name=abuf, type=float*, size=8, align=8
1016        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1017        addrSpace=constant, accQual=default, actAccQual=default
1018        Flags=
1019      Arg name=abuf2, type=float*, size=8, align=8
1020        valuekind=dynshptr, valuetype=f32, pointeeAlign=1
1021        addrSpace=local, accQual=default, actAccQual=default
1022        Flags=
1023      Arg name=abuf3, type=float*, size=8, align=8
1024        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1025        addrSpace=generic, accQual=default, actAccQual=default
1026        Flags=
1027      Arg name=abuf4, type=float*, size=8, align=8
1028        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1029        addrSpace=region, accQual=default, actAccQual=default
1030        Flags=
1031      Arg name=abuf5, type=float*, size=8, align=8
1032        valuekind=dynshptr, valuetype=f32, pointeeAlign=1
1033        addrSpace=private, accQual=default, actAccQual=default
1034        Flags=
1035      Arg name=bbuf, type=float*, size=8, align=8
1036        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1037        addrSpace=global, accQual=default, actAccQual=read_only
1038        Flags=
1039      Arg name=bbuf2, type=float*, size=8, align=8
1040        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1041        addrSpace=global, accQual=default, actAccQual=write_only
1042        Flags=
1043      Arg name=bbuf3, type=float*, size=8, align=8
1044        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1045        addrSpace=global, accQual=default, actAccQual=read_write
1046        Flags=
1047      Arg name=img1, type=image1d_t, size=8, align=8
1048        valuekind=image, valuetype=struct, pointeeAlign=0
1049        addrSpace=none, accQual=read_only, actAccQual=default
1050        Flags=
1051      Arg name=img2, type=image1d_t, size=8, align=8
1052        valuekind=image, valuetype=struct, pointeeAlign=0
1053        addrSpace=none, accQual=write_only, actAccQual=default
1054        Flags=
1055      Arg name=img3, type=image1d_t, size=8, align=8
1056        valuekind=image, valuetype=struct, pointeeAlign=0
1057        addrSpace=none, accQual=read_write, actAccQual=default
1058        Flags=
1059      Arg name=, type=, size=8, align=8
1060        valuekind=gox, valuetype=i64, pointeeAlign=0
1061        addrSpace=none, accQual=default, actAccQual=default
1062        Flags=
1063      Arg name=, type=, size=8, align=8
1064        valuekind=goy, valuetype=i64, pointeeAlign=0
1065        addrSpace=none, accQual=default, actAccQual=default
1066        Flags=
1067      Arg name=, type=, size=8, align=8
1068        valuekind=goz, valuetype=i64, pointeeAlign=0
1069        addrSpace=none, accQual=default, actAccQual=default
1070        Flags=
1071      Arg name=, type=, size=8, align=8
1072        valuekind=gox, valuetype=i64, pointeeAlign=0
1073        addrSpace=none, accQual=default, actAccQual=default
1074        Flags=
1075      Arg name=, type=, size=8, align=8
1076        valuekind=gox, valuetype=i64, pointeeAlign=0
1077        addrSpace=none, accQual=default, actAccQual=default
1078        Flags=
1079      Arg name=, type=, size=8, align=8
1080        valuekind=gox, valuetype=i64, pointeeAlign=0
1081        addrSpace=none, accQual=default, actAccQual=default
1082        Flags=
1083  EFlags=3
1084  NewBinFormat
1085)ffDXD",
1086        "", true
1087    },
1088    {
1089        R"ffDXD(.rocm
1090        .gpu Fiji
1091        .eflags 3
1092        .newbinfmt
1093        .md_version 3 , 5
1094.kernel kxx1
1095    .config
1096        .dims x
1097        .codeversion 1,0
1098        .call_convention 0x34dac
1099        .debug_private_segment_buffer_sgpr 98
1100        .debug_wavefront_private_segment_offset_sgpr 96
1101        .gds_segment_size 100
1102        .kernarg_segment_align 32
1103    # metadata
1104        .md_language "jezorx"
1105        .reqd_work_group_size 6,
1106        .work_group_size_hint 5,7
1107        .fixed_work_group_size 3,,71
1108        .md_kernarg_segment_size 64
1109        .md_kernarg_segment_align 32
1110        .md_group_segment_fixed_size 1121
1111        .md_private_segment_fixed_size 6632
1112        .md_wavefront_size 64
1113        .md_sgprsnum 14
1114        .md_vgprsnum 11
1115        .runtime_handle "SomeCodeToExec"
1116        # arg infos
1117        .arg , "", 8, 8, none, i64
1118        .arg , "", 8, 8, complact, i64
1119        .arg , "", 8, 8, printfbuf, i64
1120        .arg , "", 8, 8, defqueue, i64
1121        .arg pipe0, "pipe_t", 8, 8, pipe, struct, read_write, default pipe
1122        .arg qx01, "queue_t", 8, 8, queue, struct
1123        .arg masksamp, "sampler_t", 8, 8, sampler, struct
1124        .arg vxx1, "void*", 8, 8, globalbuf, i8, global, default const
1125        .arg vx1, "void*", 8, 8, globalbuf, i8, global, default volatile
1126        .arg dx3, "void*", 8, 8, globalbuf, i8, global, default restrict
1127        .arg ex6, "void*", 8, 8, globalbuf, i8, global, default pipe
1128        .arg fx9, "void*", 8, 8, globalbuf, i8, global, default volatile const restrict
1129.text
1130kxx1:   .skip 256
1131        s_mov_b32 s7, 0
1132        s_endpgm
1133)ffDXD",
1134        R"ffDXD(ROCmBinDump:
1135  ROCmSymbol: name=kxx1, offset=0, size=0, type=kernel
1136    Config:
1137      amdCodeVersion=1.1
1138      amdMachine=1:8:0:3
1139      kernelCodeEntryOffset=256
1140      kernelCodePrefetchOffset=0
1141      kernelCodePrefetchSize=0
1142      maxScrachBackingMemorySize=0
1143      computePgmRsrc1=0xc0040
1144      computePgmRsrc2=0x80
1145      enableSgprRegisterFlags=0x0
1146      enableFeatureFlags=0x0
1147      workitemPrivateSegmentSize=0
1148      workgroupGroupSegmentSize=0
1149      gdsSegmentSize=100
1150      kernargSegmentSize=0
1151      workgroupFbarrierCount=0
1152      wavefrontSgprCount=10
1153      workitemVgprCount=1
1154      reservedVgprFirst=0
1155      reservedVgprCount=0
1156      reservedSgprFirst=0
1157      reservedSgprCount=0
1158      debugWavefrontPrivateSegmentOffsetSgpr=96
1159      debugPrivateSegmentBufferSgpr=98
1160      kernargSegmentAlignment=5
1161      groupSegmentAlignment=4
1162      privateSegmentAlignment=4
1163      wavefrontSize=6
1164      callConvention=0x34dac
1165      runtimeLoaderKernelSymbol=0x0
1166      ControlDirective:
1167      0000000000000000000000000000000000000000000000000000000000000000
1168      0000000000000000000000000000000000000000000000000000000000000000
1169      0000000000000000000000000000000000000000000000000000000000000000
1170      0000000000000000000000000000000000000000000000000000000000000000
1171  Comment:
1172  nullptr
1173  Code:
1174  0100000000000000010008000000030000010000000000000000000000000000
1175  0000000000000000000000000000000040000c00800000000000000000000000
1176  00000000640000000000000000000000000000000a0001000000000000000000
1177  6000620005040406ac4d03000000000000000000000000000000000000000000
1178  0000000000000000000000000000000000000000000000000000000000000000
1179  0000000000000000000000000000000000000000000000000000000000000000
1180  0000000000000000000000000000000000000000000000000000000000000000
1181  0000000000000000000000000000000000000000000000000000000000000000
1182  800087be000081bf
1183  MetadataInfo:
1184    Version: 3.5
1185    Kernel: kxx1
1186      SymName=
1187      Language=jezorx 0.0
1188      ReqdWorkGroupSize=6 1 1
1189      WorkGroupSizeHint=5 7 1
1190      VecTypeHint=
1191      RuntimeHandle=SomeCodeToExec
1192      KernargSegmentSize=64
1193      KernargSegmentAlign=32
1194      GroupSegmentFixedSize=1121
1195      PrivateSegmentFixedSize=6632
1196      WaveFrontSize=64
1197      SgprsNum=14
1198      VgprsNum=11
1199      SpilledSgprs=4294967294
1200      SpilledVgprs=4294967294
1201      MaxFlatWorkGroupSize=18446744073709551614
1202      FixedWorkGroupSize=3 1 71
1203      Arg name=, type=, size=8, align=8
1204        valuekind=none, valuetype=i64, pointeeAlign=0
1205        addrSpace=none, accQual=default, actAccQual=default
1206        Flags=
1207      Arg name=, type=, size=8, align=8
1208        valuekind=complact, valuetype=i64, pointeeAlign=0
1209        addrSpace=none, accQual=default, actAccQual=default
1210        Flags=
1211      Arg name=, type=, size=8, align=8
1212        valuekind=printfbuf, valuetype=i64, pointeeAlign=0
1213        addrSpace=none, accQual=default, actAccQual=default
1214        Flags=
1215      Arg name=, type=, size=8, align=8
1216        valuekind=defqueue, valuetype=i64, pointeeAlign=0
1217        addrSpace=none, accQual=default, actAccQual=default
1218        Flags=
1219      Arg name=pipe0, type=pipe_t, size=8, align=8
1220        valuekind=pipe, valuetype=struct, pointeeAlign=0
1221        addrSpace=none, accQual=read_write, actAccQual=default
1222        Flags= pipe
1223      Arg name=qx01, type=queue_t, size=8, align=8
1224        valuekind=queue, valuetype=struct, pointeeAlign=0
1225        addrSpace=none, accQual=default, actAccQual=default
1226        Flags=
1227      Arg name=masksamp, type=sampler_t, size=8, align=8
1228        valuekind=sampler, valuetype=struct, pointeeAlign=0
1229        addrSpace=none, accQual=default, actAccQual=default
1230        Flags=
1231      Arg name=vxx1, type=void*, size=8, align=8
1232        valuekind=globalbuf, valuetype=i8, pointeeAlign=0
1233        addrSpace=global, accQual=default, actAccQual=default
1234        Flags= const
1235      Arg name=vx1, type=void*, size=8, align=8
1236        valuekind=globalbuf, valuetype=i8, pointeeAlign=0
1237        addrSpace=global, accQual=default, actAccQual=default
1238        Flags= volatile
1239      Arg name=dx3, type=void*, size=8, align=8
1240        valuekind=globalbuf, valuetype=i8, pointeeAlign=0
1241        addrSpace=global, accQual=default, actAccQual=default
1242        Flags= restrict
1243      Arg name=ex6, type=void*, size=8, align=8
1244        valuekind=globalbuf, valuetype=i8, pointeeAlign=0
1245        addrSpace=global, accQual=default, actAccQual=default
1246        Flags= pipe
1247      Arg name=fx9, type=void*, size=8, align=8
1248        valuekind=globalbuf, valuetype=i8, pointeeAlign=0
1249        addrSpace=global, accQual=default, actAccQual=default
1250        Flags= const restrict volatile
1251  EFlags=3
1252  NewBinFormat
1253)ffDXD",
1254        "", true
1255    }
1256};
1257
1258static void testAssembler(cxuint testId, const AsmTestCase& testCase)
1259{
1260    std::istringstream input(testCase.input);
1261    std::ostringstream errorStream;
1262    std::ostringstream printStream;
1263   
1264    // create assembler with testcase's input and with ASM_TESTRUN flag
1265    Assembler assembler("test.s", input, (ASM_ALL|ASM_TESTRUN)&~ASM_ALTMACRO,
1266            BinaryFormat::AMD, GPUDeviceType::CAPE_VERDE, errorStream, printStream);
1267    bool good = assembler.assemble();
1268   
1269    std::ostringstream dumpOss;
1270    if (good && assembler.getFormatHandler()!=nullptr)
1271        // get format handler and their output
1272        printROCmOutput(dumpOss, static_cast<const AsmROCmHandler*>(
1273                    assembler.getFormatHandler())->getOutput());
1274    /* compare results dump with expected dump */
1275    char testName[30];
1276    snprintf(testName, 30, "Test #%u", testId);
1277   
1278    assertValue(testName, "good", int(testCase.good), int(good));
1279    assertString(testName, "dump", testCase.dump, dumpOss.str());
1280    assertString(testName, "errorMessages", testCase.errors, errorStream.str());
1281}
1282
1283int main(int argc, const char** argv)
1284{
1285    int retVal = 0;
1286    for (size_t i = 0; i < sizeof(asmTestCases1Tbl)/sizeof(AsmTestCase); i++)
1287        try
1288        { testAssembler(i, asmTestCases1Tbl[i]); }
1289        catch(const std::exception& ex)
1290        {
1291            std::cerr << ex.what() << std::endl;
1292            retVal = 1;
1293        }
1294    return retVal;
1295}
Note: See TracBrowser for help on using the repository browser.