source: CLRX/CLRadeonExtender/trunk/tests/amdasm/AsmROCmFormat.cpp @ 3754

Last change on this file since 3754 was 3754, checked in by matszpk, 2 years ago

CLRadeonExtender: AsmROCm: Do not set local size field in PGMRSRC2 if new binary format.

File size: 51.5 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2018 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19
20#include <CLRX/Config.h>
21#include <iostream>
22#include <cstdio>
23#include <sstream>
24#include <string>
25#include <algorithm>
26#include <memory>
27#include <CLRX/amdbin/ROCmBinaries.h>
28#include <CLRX/amdasm/Assembler.h>
29#include <CLRX/amdasm/AsmFormats.h>
30#include "../TestUtils.h"
31
32using namespace CLRX;
33
34static void printHexData(std::ostream& os, cxuint indentLevel, size_t size,
35             const cxbyte* data)
36{
37    if (data==nullptr)
38    {
39        for (cxuint j = 0; j < indentLevel; j++)
40            os << "  ";
41        os << "nullptr\n";
42        return;
43    }
44    for (size_t i = 0; i < size; i++)
45    {
46        if ((i&31)==0)
47            for (cxuint j = 0; j < indentLevel; j++)
48                os << "  ";
49        char buf[10];
50        snprintf(buf, 10, "%02x", cxuint(data[i]));
51        os << buf;
52        if ((i&31)==31 || i+1 == size)
53            os << '\n';
54    }
55}
56
57static const char* rocmRegionTypeNames[3] =
58{ "data", "fkernel", "kernel" };
59
60static const char* rocmValueKindNames[] =
61{
62    "value", "globalbuf", "dynshptr", "sampler", "image", "pipe", "queue",
63    "gox", "goy", "goz", "none", "printfbuf", "defqueue", "complact"
64};
65
66static const char* rocmValueTypeNames[] =
67{ "struct", "i8", "u8", "i16", "u16", "f16", "i32", "u32", "f32", "i64", "u64", "f64" };
68
69static const char* rocmAddressSpaces[] =
70{ "none", "private", "global", "constant", "local", "generic", "region" };
71
72static const char* rocmAccessQuals[] =
73{ "default", "read_only", "write_only", "read_write" };
74
75// print dump of ROCm output to stream for comparing with testcase
76static void printROCmOutput(std::ostream& os, const ROCmInput* output)
77{
78    os << "ROCmBinDump:" << std::endl;
79    for (const ROCmSymbolInput& symbol: output->symbols)
80    {
81        os << "  ROCmSymbol: name=" << symbol.symbolName << ", " <<
82                "offset=" << symbol.offset << ", size=" << symbol.size << ", type=" <<
83                rocmRegionTypeNames[cxuint(symbol.type)] << "\n";
84        if (symbol.type == ROCmRegionType::DATA)
85            continue;
86        if (symbol.offset+sizeof(ROCmKernelConfig) > output->codeSize)
87            continue;
88        const ROCmKernelConfig& config = *reinterpret_cast<const ROCmKernelConfig*>(
89                            output->code + symbol.offset);
90       
91        // print kernel configuration
92        os << "    Config:\n"
93            "      amdCodeVersion=" << ULEV(config.amdCodeVersionMajor) << "." <<
94                ULEV(config.amdCodeVersionMajor) << "\n"
95            "      amdMachine=" << ULEV(config.amdMachineKind) << ":" <<
96                ULEV(config.amdMachineMajor) << ":" <<
97                ULEV(config.amdMachineMinor) << ":" <<
98                ULEV(config.amdMachineStepping) << "\n"
99            "      kernelCodeEntryOffset=" << ULEV(config.kernelCodeEntryOffset) << "\n"
100            "      kernelCodePrefetchOffset=" <<
101                ULEV(config.kernelCodePrefetchOffset) << "\n"
102            "      kernelCodePrefetchSize=" << ULEV(config.kernelCodePrefetchSize) << "\n"
103            "      maxScrachBackingMemorySize=" <<
104                ULEV(config.maxScrachBackingMemorySize) << "\n"
105            "      computePgmRsrc1=0x" << std::hex << ULEV(config.computePgmRsrc1) << "\n"
106            "      computePgmRsrc2=0x" << ULEV(config.computePgmRsrc2) << "\n"
107            "      enableSgprRegisterFlags=0x" <<
108                ULEV(config.enableSgprRegisterFlags) << "\n"
109            "      enableFeatureFlags=0x" <<
110                ULEV(config.enableFeatureFlags) << std::dec << "\n"
111            "      workitemPrivateSegmentSize=" <<
112                ULEV(config.workitemPrivateSegmentSize) << "\n"
113            "      workgroupGroupSegmentSize=" <<
114                ULEV(config.workgroupGroupSegmentSize) << "\n"
115            "      gdsSegmentSize=" << ULEV(config.gdsSegmentSize) << "\n"
116            "      kernargSegmentSize=" << ULEV(config.kernargSegmentSize) << "\n"
117            "      workgroupFbarrierCount=" << ULEV(config.workgroupFbarrierCount) << "\n"
118            "      wavefrontSgprCount=" << ULEV(config.wavefrontSgprCount) << "\n"
119            "      workitemVgprCount=" << ULEV(config.workitemVgprCount) << "\n"
120            "      reservedVgprFirst=" << ULEV(config.reservedVgprFirst) << "\n"
121            "      reservedVgprCount=" << ULEV(config.reservedVgprCount) << "\n"
122            "      reservedSgprFirst=" << ULEV(config.reservedSgprFirst) << "\n"
123            "      reservedSgprCount=" << ULEV(config.reservedSgprCount) << "\n"
124            "      debugWavefrontPrivateSegmentOffsetSgpr=" <<
125                ULEV(config.debugWavefrontPrivateSegmentOffsetSgpr) << "\n"
126            "      debugPrivateSegmentBufferSgpr=" <<
127                ULEV(config.debugPrivateSegmentBufferSgpr) << "\n"
128            "      kernargSegmentAlignment=" << 
129                cxuint(config.kernargSegmentAlignment) << "\n"
130            "      groupSegmentAlignment=" <<
131                cxuint(config.groupSegmentAlignment) << "\n"
132            "      privateSegmentAlignment=" <<
133                cxuint(config.privateSegmentAlignment) << "\n"
134            "      wavefrontSize=" << cxuint(config.wavefrontSize) << "\n"
135            "      callConvention=0x" << std::hex << ULEV(config.callConvention) << "\n"
136            "      runtimeLoaderKernelSymbol=0x" <<
137                ULEV(config.runtimeLoaderKernelSymbol) << std::dec << "\n";
138        os << "      ControlDirective:\n";
139        printHexData(os, 3, 128, config.controlDirective);
140    }
141    // print comment and code
142    os << "  Comment:\n";
143    printHexData(os, 1, output->commentSize, (const cxbyte*)output->comment);
144    os << "  Code:\n";
145    printHexData(os, 1, output->codeSize, output->code);
146    if (output->globalData != nullptr)
147    {
148        os << "  GlobalData:\n";
149        printHexData(os, 1, output->globalDataSize, output->globalData);
150    }
151   
152    if (output->metadata != nullptr)
153        os << "  Metadata:\n" << std::string(output->metadata,
154                            output->metadataSize) << "\n";
155   
156    // dump ROCm metadata
157    if (output->useMetadataInfo)
158    {
159        const ROCmMetadata& metadata = output->metadataInfo;
160        os << "  MetadataInfo:\n"
161            "    Version: " << metadata.version[0] << "." << metadata.version[1] << "\n";
162        // dump printf info
163        for (const ROCmPrintfInfo& printfInfo: metadata.printfInfos)
164        {
165            os << "    Printf: " << printfInfo.id;
166            for (size_t argSize: printfInfo.argSizes)
167                os << ", " << argSize;
168            os << "; \"" << printfInfo.format << "\"\n";
169        }
170        // dump kernel metadata
171        for (const ROCmKernelMetadata& kernel: metadata.kernels)
172        {
173            os << "    Kernel: " << kernel.name << "\n"
174                "      SymName=" << kernel.symbolName << "\n"
175                "      Language=" << kernel.language << " " <<
176                        kernel.langVersion[0] << "." << kernel.langVersion[1] << "\n"
177                "      ReqdWorkGroupSize=" << kernel.reqdWorkGroupSize[0] << " " <<
178                        kernel.reqdWorkGroupSize[1] << " " <<
179                        kernel.reqdWorkGroupSize[2] << "\n"
180                "      WorkGroupSizeHint=" << kernel.workGroupSizeHint[0] << " " <<
181                        kernel.workGroupSizeHint[1] << " " <<
182                        kernel.workGroupSizeHint[2] << "\n"
183                "      VecTypeHint=" << kernel.vecTypeHint << "\n"
184                "      RuntimeHandle=" << kernel.runtimeHandle << "\n"
185                "      KernargSegmentSize=" << kernel.kernargSegmentSize << "\n"
186                "      KernargSegmentAlign=" << kernel.kernargSegmentAlign << "\n"
187                "      GroupSegmentFixedSize=" << kernel.groupSegmentFixedSize<< "\n"
188                "      PrivateSegmentFixedSize=" << kernel.privateSegmentFixedSize<< "\n"
189                "      WaveFrontSize=" << kernel.wavefrontSize << "\n"
190                "      SgprsNum=" << kernel.sgprsNum << "\n"
191                "      VgprsNum=" << kernel.vgprsNum << "\n"
192                "      SpilledSgprs=" << kernel.spilledSgprs << "\n"
193                "      SpilledVgprs=" << kernel.spilledVgprs << "\n"
194                "      MaxFlatWorkGroupSize=" << kernel.maxFlatWorkGroupSize << "\n"
195                "      FixedWorkGroupSize=" << kernel.fixedWorkGroupSize[0] << " " <<
196                        kernel.fixedWorkGroupSize[1] << " " <<
197                        kernel.fixedWorkGroupSize[2] << "\n";
198           
199            // dump kernel arguments
200            for (const ROCmKernelArgInfo& argInfo: kernel.argInfos)
201                os << "      Arg name=" << argInfo.name << ", type=" << argInfo.typeName <<
202                    ", size=" << argInfo.size << ", align=" << argInfo.align << "\n"
203                    "        valuekind=" <<
204                            rocmValueKindNames[cxuint(argInfo.valueKind)] <<
205                    ", valuetype=" << rocmValueTypeNames[cxuint(argInfo.valueType)] <<
206                    ", pointeeAlign=" << argInfo.pointeeAlign << "\n"
207                    "        addrSpace=" <<
208                            rocmAddressSpaces[cxuint(argInfo.addressSpace)] <<
209                    ", accQual=" << rocmAccessQuals[cxuint(argInfo.accessQual)] <<
210                    ", actAccQual=" <<
211                            rocmAccessQuals[cxuint(argInfo.actualAccessQual)] << "\n"
212                    "        Flags=" <<
213                    (argInfo.isConst ? " const" : "") <<
214                    (argInfo.isRestrict ? " restrict" : "") <<
215                    (argInfo.isVolatile ? " volatile" : "") <<
216                    (argInfo.isPipe ? " pipe" : "") << "\n";
217        }
218    }
219   
220    if (!output->target.empty())
221        os << "  Target=" << output->target << "\n";
222    if (output->eflags != BINGEN_DEFAULT)
223        os << "  EFlags=" << output->eflags << std::endl;
224   
225    if (output->newBinFormat)
226        os << "  NewBinFormat\n";
227   
228    // print extra sections if supplied
229    for (BinSection section: output->extraSections)
230    {
231        os << "  Section " << section.name << ", type=" << section.type <<
232                        ", flags=" << section.flags << ":\n";
233        printHexData(os, 1, section.size, section.data);
234    }
235    // print extra symbols if supplied
236    for (BinSymbol symbol: output->extraSymbols)
237        os << "  Symbol: name=" << symbol.name << ", value=" << symbol.value <<
238                ", size=" << symbol.size << ", section=" << symbol.sectionId << "\n";
239    os.flush();
240}
241
242
243struct AsmTestCase
244{
245    const char* input;
246    const char* dump;
247    const char* errors;
248    bool good;
249};
250
251static const AsmTestCase asmTestCases1Tbl[] =
252{
253    {
254        R"ffDXD(        .rocm
255        .gpu Fiji
256.kernel kxx1
257    .fkernel
258    .config
259        .dims x
260        .codeversion 1,0
261        .call_convention 0x34dac
262        .debug_private_segment_buffer_sgpr 98
263        .debug_wavefront_private_segment_offset_sgpr 96
264        .gds_segment_size 100
265        .kernarg_segment_align 32
266        .workgroup_group_segment_size 22
267        .workgroup_fbarrier_count 3324
268        .dx10clamp
269        .exceptions 10
270        .private_segment_align 128
271        .privmode
272        .reserved_sgprs 5,14
273        .runtime_loader_kernel_symbol 0x4dc98b3a
274        .scratchbuffer 77222
275        .reserved_sgprs 9,12
276        .reserved_vgprs 7,17
277        .private_elem_size 16
278    .control_directive
279        .int 1,2,3
280        .fill 116,1,0
281.kernel kxx2
282    .config
283        .dims x
284        .codeversion 1,0
285        .call_convention 0x112223
286.kernel kxx1
287    .config
288        .scratchbuffer 111
289.text
290kxx1:
291        .skip 256
292        s_mov_b32 s7, 0
293        s_endpgm
294       
295.align 256
296kxx2:
297        .skip 256
298        s_endpgm
299.section .comment
300        .ascii "some comment for you"
301.kernel kxx2
302    .control_directive
303        .fill 124,1,0xde
304    .config
305        .use_kernarg_segment_ptr
306    .control_directive
307        .int 0xaadd66cc
308    .config
309.kernel kxx1
310.kernel kxx2
311        .call_convention 0x1112234
312       
313)ffDXD",
314        /* dump */
315        R"ffDXD(ROCmBinDump:
316  ROCmSymbol: name=kxx1, offset=0, size=0, type=fkernel
317    Config:
318      amdCodeVersion=1.1
319      amdMachine=1:8:0:3
320      kernelCodeEntryOffset=256
321      kernelCodePrefetchOffset=0
322      kernelCodePrefetchSize=0
323      maxScrachBackingMemorySize=0
324      computePgmRsrc1=0x3c0040
325      computePgmRsrc2=0xa008081
326      enableSgprRegisterFlags=0x0
327      enableFeatureFlags=0x6
328      workitemPrivateSegmentSize=111
329      workgroupGroupSegmentSize=22
330      gdsSegmentSize=100
331      kernargSegmentSize=0
332      workgroupFbarrierCount=3324
333      wavefrontSgprCount=10
334      workitemVgprCount=1
335      reservedVgprFirst=7
336      reservedVgprCount=11
337      reservedSgprFirst=9
338      reservedSgprCount=4
339      debugWavefrontPrivateSegmentOffsetSgpr=96
340      debugPrivateSegmentBufferSgpr=98
341      kernargSegmentAlignment=5
342      groupSegmentAlignment=4
343      privateSegmentAlignment=7
344      wavefrontSize=6
345      callConvention=0x34dac
346      runtimeLoaderKernelSymbol=0x4dc98b3a
347      ControlDirective:
348      0100000002000000030000000000000000000000000000000000000000000000
349      0000000000000000000000000000000000000000000000000000000000000000
350      0000000000000000000000000000000000000000000000000000000000000000
351      0000000000000000000000000000000000000000000000000000000000000000
352  ROCmSymbol: name=kxx2, offset=512, size=0, type=kernel
353    Config:
354      amdCodeVersion=1.1
355      amdMachine=1:8:0:3
356      kernelCodeEntryOffset=256
357      kernelCodePrefetchOffset=0
358      kernelCodePrefetchSize=0
359      maxScrachBackingMemorySize=0
360      computePgmRsrc1=0xc0000
361      computePgmRsrc2=0x84
362      enableSgprRegisterFlags=0x8
363      enableFeatureFlags=0x0
364      workitemPrivateSegmentSize=0
365      workgroupGroupSegmentSize=0
366      gdsSegmentSize=0
367      kernargSegmentSize=0
368      workgroupFbarrierCount=0
369      wavefrontSgprCount=5
370      workitemVgprCount=1
371      reservedVgprFirst=0
372      reservedVgprCount=0
373      reservedSgprFirst=0
374      reservedSgprCount=0
375      debugWavefrontPrivateSegmentOffsetSgpr=0
376      debugPrivateSegmentBufferSgpr=0
377      kernargSegmentAlignment=4
378      groupSegmentAlignment=4
379      privateSegmentAlignment=4
380      wavefrontSize=6
381      callConvention=0x1112234
382      runtimeLoaderKernelSymbol=0x0
383      ControlDirective:
384      dededededededededededededededededededededededededededededededede
385      dededededededededededededededededededededededededededededededede
386      dededededededededededededededededededededededededededededededede
387      dedededededededededededededededededededededededededededecc66ddaa
388  Comment:
389  736f6d6520636f6d6d656e7420666f7220796f75
390  Code:
391  0100000000000000010008000000030000010000000000000000000000000000
392  0000000000000000000000000000000040003c008180000a000006006f000000
393  16000000640000000000000000000000fc0c00000a00010007000b0009000400
394  6000620005040706ac4d03000000000000000000000000003a8bc94d00000000
395  0100000002000000030000000000000000000000000000000000000000000000
396  0000000000000000000000000000000000000000000000000000000000000000
397  0000000000000000000000000000000000000000000000000000000000000000
398  0000000000000000000000000000000000000000000000000000000000000000
399  800087be000081bf000080bf000080bf000080bf000080bf000080bf000080bf
400  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
401  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
402  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
403  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
404  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
405  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
406  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
407  0100000000000000010008000000030000010000000000000000000000000000
408  0000000000000000000000000000000000000c00840000000800000000000000
409  0000000000000000000000000000000000000000050001000000000000000000
410  0000000004040406342211010000000000000000000000000000000000000000
411  dededededededededededededededededededededededededededededededede
412  dededededededededededededededededededededededededededededededede
413  dededededededededededededededededededededededededededededededede
414  dedededededededededededededededededededededededededededecc66ddaa
415  000081bf
416)ffDXD",
417        /* warning/errors */
418        "",
419        true
420    },
421    {
422        R"ffDXD(        .rocm
423        .gpu Fiji
424.kernel someKernelX
425    .config
426        .dims xz
427        .call_convention 331
428        .codeversion 1,0
429        .machine 8,0,1,2
430        .debug_private_segment_buffer_sgpr 10
431        .debug_wavefront_private_segment_offset_sgpr 31
432        .exceptions 0x3e
433        .floatmode 0xc3
434        .gds_segment_size 105
435        .group_segment_align 128
436        .kernarg_segment_align 64
437        .kernarg_segment_size 228
438        .kernel_code_entry_offset 256
439        .kernel_code_prefetch_offset 1002
440        .kernel_code_prefetch_size 13431
441        .max_scratch_backing_memory 4212
442        .pgmrsrc1 0xa0000000
443        .pgmrsrc2 0xd00000
444        .priority 2
445        .private_elem_size 8
446        .private_segment_align 32
447        .reserved_sgprs 12,19
448        .reserved_vgprs 26,48
449        .runtime_loader_kernel_symbol 0x3eda1
450        .scratchbuffer 2330
451        .use_debug_enabled
452        .use_flat_scratch_init
453        .use_grid_workgroup_count xz
454        .use_private_segment_buffer
455        .use_ptr64
456        .use_xnack_enabled
457        .wavefront_size 256
458        .workgroup_fbarrier_count 69
459        .workgroup_group_segment_size 324
460        .workitem_private_segment_size 33
461        .vgprsnum 211
462        .sgprsnum 85
463.text
464someKernelX:
465        .skip 256
466        s_endpgm)ffDXD",
467        R"ffDXD(ROCmBinDump:
468  ROCmSymbol: name=someKernelX, offset=0, size=0, type=kernel
469    Config:
470      amdCodeVersion=1.1
471      amdMachine=8:0:1:2
472      kernelCodeEntryOffset=256
473      kernelCodePrefetchOffset=1002
474      kernelCodePrefetchSize=13431
475      maxScrachBackingMemorySize=4212
476      computePgmRsrc1=0xa00c3ab4
477      computePgmRsrc2=0x3ed09291
478      enableSgprRegisterFlags=0x2a1
479      enableFeatureFlags=0x6c
480      workitemPrivateSegmentSize=33
481      workgroupGroupSegmentSize=324
482      gdsSegmentSize=105
483      kernargSegmentSize=228
484      workgroupFbarrierCount=69
485      wavefrontSgprCount=85
486      workitemVgprCount=211
487      reservedVgprFirst=26
488      reservedVgprCount=23
489      reservedSgprFirst=12
490      reservedSgprCount=8
491      debugWavefrontPrivateSegmentOffsetSgpr=31
492      debugPrivateSegmentBufferSgpr=10
493      kernargSegmentAlignment=6
494      groupSegmentAlignment=7
495      privateSegmentAlignment=5
496      wavefrontSize=8
497      callConvention=0x14b
498      runtimeLoaderKernelSymbol=0x3eda1
499      ControlDirective:
500      0000000000000000000000000000000000000000000000000000000000000000
501      0000000000000000000000000000000000000000000000000000000000000000
502      0000000000000000000000000000000000000000000000000000000000000000
503      0000000000000000000000000000000000000000000000000000000000000000
504  Comment:
505  nullptr
506  Code:
507  010000000000000008000000010002000001000000000000ea03000000000000
508  77340000000000007410000000000000b43a0ca09192d03ea1026c0021000000
509  4401000069000000e400000000000000450000005500d3001a0017000c000800
510  1f000a00060705084b010000000000000000000000000000a1ed030000000000
511  0000000000000000000000000000000000000000000000000000000000000000
512  0000000000000000000000000000000000000000000000000000000000000000
513  0000000000000000000000000000000000000000000000000000000000000000
514  0000000000000000000000000000000000000000000000000000000000000000
515  000081bf
516)ffDXD",
517        /* warning/errors */
518        "",
519        true
520    },
521    {
522        R"ffDXD(        .rocm
523        .gpu Fiji
524.kernel someKernelX
525    .config
526        .dims xz
527        .reserved_vgprs 0, 11
528.text
529someKernelX:
530        s_endpgm)ffDXD",
531        "", "test.s:3:1: Error: "
532        "Code for kernel 'someKernelX' is too small for configuration\n", false
533    },
534    {
535        R"ffDXD(        .rocm
536        .gpu Fiji
537.kernel someKernelX
538    .config
539        .dims xz
540        .reserved_vgprs 12,11
541        .reserved_sgprs 17,11
542        .reserved_vgprs 256,257
543        .reserved_sgprs 112,113
544        .debug_private_segment_buffer_sgpr 123
545        .debug_wavefront_private_segment_offset_sgpr 108
546        .private_elem_size 6
547        .private_elem_size 1
548        .private_elem_size 32
549        .kernarg_segment_align 56
550        .kernarg_segment_align 8
551        .private_segment_align 56
552        .private_segment_align 8
553        .wavefront_size 157
554        .wavefront_size 512
555        .pgmrsrc2 0xaa1fd3da2313
556.text
557someKernelX:
558        .skip 256
559        s_endpgm)ffDXD",
560        "", R"ffDXD(test.s:6:28: Error: Wrong register range
561test.s:7:28: Error: Wrong register range
562test.s:8:25: Error: First reserved VGPR register out of range (0-255)
563test.s:8:29: Error: Last reserved VGPR register out of range (0-255)
564test.s:9:25: Error: First reserved SGPR register out of range (0-101)
565test.s:9:29: Error: Last reserved SGPR register out of range (0-101)
566test.s:10:44: Error: SGPR register out of range
567test.s:11:54: Error: SGPR register out of range
568test.s:12:28: Error: Private element size must be power of two
569test.s:13:28: Error: Private element size out of range
570test.s:14:28: Error: Private element size out of range
571test.s:15:32: Error: Alignment must be power of two
572test.s:16:32: Error: Alignment must be not smaller than 16
573test.s:17:32: Error: Alignment must be power of two
574test.s:18:32: Error: Alignment must be not smaller than 16
575test.s:19:25: Error: Wavefront size must be power of two
576test.s:20:25: Error: Wavefront size must be not greater than 256
577test.s:21:19: Warning: Value 0xaa1fd3da2313 truncated to 0xd3da2313
578)ffDXD", false
579    },
580    {   // different eflags
581        R"ffDXD(.rocm
582        .gpu Fiji
583        .eflags 3
584.kernel kxx1
585    .config
586        .dims x
587        .codeversion 1,0
588        .call_convention 0x34dac
589        .debug_private_segment_buffer_sgpr 98
590        .debug_wavefront_private_segment_offset_sgpr 96
591        .gds_segment_size 100
592        .kernarg_segment_align 32
593        .workgroup_group_segment_size 22
594        .workgroup_fbarrier_count 3324
595        .dx10clamp
596        .exceptions 10
597        .private_segment_align 128
598        .privmode
599        .reserved_sgprs 5,14
600        .runtime_loader_kernel_symbol 0x4dc98b3a
601        .scratchbuffer 77222
602        .reserved_sgprs 9,12
603        .reserved_vgprs 7,17
604        .private_elem_size 16
605    .control_directive
606        .int 1,2,3
607        .fill 116,1,0
608.text
609kxx1:
610        .skip 256
611        s_mov_b32 s7, 0
612        s_endpgm
613)ffDXD",
614        R"ffDXD(ROCmBinDump:
615  ROCmSymbol: name=kxx1, offset=0, size=0, type=kernel
616    Config:
617      amdCodeVersion=1.1
618      amdMachine=1:8:0:3
619      kernelCodeEntryOffset=256
620      kernelCodePrefetchOffset=0
621      kernelCodePrefetchSize=0
622      maxScrachBackingMemorySize=0
623      computePgmRsrc1=0x3c0040
624      computePgmRsrc2=0xa008081
625      enableSgprRegisterFlags=0x0
626      enableFeatureFlags=0x6
627      workitemPrivateSegmentSize=77222
628      workgroupGroupSegmentSize=22
629      gdsSegmentSize=100
630      kernargSegmentSize=0
631      workgroupFbarrierCount=3324
632      wavefrontSgprCount=10
633      workitemVgprCount=1
634      reservedVgprFirst=7
635      reservedVgprCount=11
636      reservedSgprFirst=9
637      reservedSgprCount=4
638      debugWavefrontPrivateSegmentOffsetSgpr=96
639      debugPrivateSegmentBufferSgpr=98
640      kernargSegmentAlignment=5
641      groupSegmentAlignment=4
642      privateSegmentAlignment=7
643      wavefrontSize=6
644      callConvention=0x34dac
645      runtimeLoaderKernelSymbol=0x4dc98b3a
646      ControlDirective:
647      0100000002000000030000000000000000000000000000000000000000000000
648      0000000000000000000000000000000000000000000000000000000000000000
649      0000000000000000000000000000000000000000000000000000000000000000
650      0000000000000000000000000000000000000000000000000000000000000000
651  Comment:
652  nullptr
653  Code:
654  0100000000000000010008000000030000010000000000000000000000000000
655  0000000000000000000000000000000040003c008180000a00000600a62d0100
656  16000000640000000000000000000000fc0c00000a00010007000b0009000400
657  6000620005040706ac4d03000000000000000000000000003a8bc94d00000000
658  0100000002000000030000000000000000000000000000000000000000000000
659  0000000000000000000000000000000000000000000000000000000000000000
660  0000000000000000000000000000000000000000000000000000000000000000
661  0000000000000000000000000000000000000000000000000000000000000000
662  800087be000081bf
663  EFlags=3
664)ffDXD", "", true
665    },
666    {   // metadata and others
667        R"ffDXD(.rocm
668        .gpu Fiji
669        .eflags 3
670        .newbinfmt
671.metadata
672        .ascii "sometext in this place\n"
673        .ascii "maybe not unrecognizable by parser but it is understandable by human\n"
674.globaldata
675        .byte 1,2,3,4,5,5,6,33
676.kernel kxx1
677    .config
678        .dims x
679        .codeversion 1,0
680        .call_convention 0x34dac
681        .debug_private_segment_buffer_sgpr 98
682        .debug_wavefront_private_segment_offset_sgpr 96
683        .gds_segment_size 100
684        .kernarg_segment_align 32
685        .workgroup_group_segment_size 22
686        .workgroup_fbarrier_count 3324
687        .dx10clamp
688        .exceptions 10
689        .private_segment_align 128
690        .privmode
691        .reserved_sgprs 5,14
692        .runtime_loader_kernel_symbol 0x4dc98b3a
693        .scratchbuffer 77222
694        .reserved_sgprs 9,12
695        .reserved_vgprs 7,17
696        .private_elem_size 16
697    .control_directive
698        .int 1,2,3
699        .fill 116,1,0
700.text
701kxx1:
702        .skip 256
703        s_mov_b32 s7, 0
704        s_endpgm
705)ffDXD",
706        R"ffDXD(ROCmBinDump:
707  ROCmSymbol: name=kxx1, offset=0, size=0, type=kernel
708    Config:
709      amdCodeVersion=1.1
710      amdMachine=1:8:0:3
711      kernelCodeEntryOffset=256
712      kernelCodePrefetchOffset=0
713      kernelCodePrefetchSize=0
714      maxScrachBackingMemorySize=0
715      computePgmRsrc1=0x3c0040
716      computePgmRsrc2=0xa000081
717      enableSgprRegisterFlags=0x0
718      enableFeatureFlags=0x6
719      workitemPrivateSegmentSize=77222
720      workgroupGroupSegmentSize=22
721      gdsSegmentSize=100
722      kernargSegmentSize=0
723      workgroupFbarrierCount=3324
724      wavefrontSgprCount=10
725      workitemVgprCount=1
726      reservedVgprFirst=7
727      reservedVgprCount=11
728      reservedSgprFirst=9
729      reservedSgprCount=4
730      debugWavefrontPrivateSegmentOffsetSgpr=96
731      debugPrivateSegmentBufferSgpr=98
732      kernargSegmentAlignment=5
733      groupSegmentAlignment=4
734      privateSegmentAlignment=7
735      wavefrontSize=6
736      callConvention=0x34dac
737      runtimeLoaderKernelSymbol=0x4dc98b3a
738      ControlDirective:
739      0100000002000000030000000000000000000000000000000000000000000000
740      0000000000000000000000000000000000000000000000000000000000000000
741      0000000000000000000000000000000000000000000000000000000000000000
742      0000000000000000000000000000000000000000000000000000000000000000
743  Comment:
744  nullptr
745  Code:
746  0100000000000000010008000000030000010000000000000000000000000000
747  0000000000000000000000000000000040003c008100000a00000600a62d0100
748  16000000640000000000000000000000fc0c00000a00010007000b0009000400
749  6000620005040706ac4d03000000000000000000000000003a8bc94d00000000
750  0100000002000000030000000000000000000000000000000000000000000000
751  0000000000000000000000000000000000000000000000000000000000000000
752  0000000000000000000000000000000000000000000000000000000000000000
753  0000000000000000000000000000000000000000000000000000000000000000
754  800087be000081bf
755  GlobalData:
756  0102030405050621
757  Metadata:
758sometext in this place
759maybe not unrecognizable by parser but it is understandable by human
760
761  EFlags=3
762  NewBinFormat
763)ffDXD", "", true
764    },
765    {   // metadata info
766        R"ffDXD(.rocm
767        .gpu Fiji
768        .eflags 3
769        .newbinfmt
770        .md_version 3 , 5
771        .printf 1 ,5 ,7 , 2,  11, "sometext %d %e %f"
772        .printf 2 ,"sometext"
773        .printf  , 16 ,8 , 2,  4, "sometext %d %e %f"
774.kernel kxx1
775    .config
776        .dims x
777        .codeversion 1,0
778        .call_convention 0x34dac
779        .debug_private_segment_buffer_sgpr 98
780        .debug_wavefront_private_segment_offset_sgpr 96
781        .gds_segment_size 100
782        .kernarg_segment_align 32
783    # metadata
784        .md_symname "kxx1@kd"
785        .md_language "Poliglot", 3, 1
786        .reqd_work_group_size 6,2,4
787        .work_group_size_hint 5,7,2
788        .vectypehint float16
789        .spilledsgprs 11
790        .spilledvgprs 52
791        .md_kernarg_segment_size 64
792        .md_kernarg_segment_align 8
793        .md_group_segment_fixed_size 0
794        .md_private_segment_fixed_size 0
795        .md_wavefront_size 64
796        .md_sgprsnum 14
797        .md_vgprsnum 11
798        .max_flat_work_group_size 256
799        .arg n, "uint", 4, , value, u32
800        .arg n2, "uint", 12, , value, u32
801        .arg x0, "char", 1, 16, value, char
802        .arg x1, "int8", 1, 16, value, i8
803        .arg x2, "short", 2, 16, value, short
804        .arg x3, "int16", 2, 16, value, i16
805        .arg x4, "int", 4, 16, value, int
806        .arg x5, "int32", 4, 16, value, i32
807        .arg x6, "long", 8, 16, value, long
808        .arg x7, "int64", 8, 16, value, i64
809        .arg x8, "uchar", 1, 16, value, uchar
810        .arg x9, "uint8", 1, 16, value, u8
811        .arg x10, "ushort", 2, 16, value, ushort
812        .arg x11, "uint16", 2, 16, value, u16
813        .arg x12, "uint", 4, 16, value, uint
814        .arg x13, "uint32", 4, 16, value, u32
815        .arg x14, "ulong", 8, 16, value, ulong
816        .arg x15, "uint64", 8, 16, value, u64
817        .arg x16, "half", 2, 16, value, half
818        .arg x17, "fp16", 2, 16, value, f16
819        .arg x18, "float", 4, 16, value, float
820        .arg x19, "fp32", 4, 16, value, f32
821        .arg x20, "double", 8, 16, value, double
822        .arg x21, "fp64", 8, 16, value, f64
823        .arg a, "float*", 8, 8, globalbuf, f32, global, default const volatile
824        .arg abuf, "float*", 8, 8, globalbuf, f32, constant, default
825        .arg abuf2, "float*", 8, 8, dynshptr, f32, 1, local
826        .arg abuf3, "float*", 8, 8, globalbuf, f32, generic, default
827        .arg abuf4, "float*", 8, 8, globalbuf, f32, region, default
828        .arg abuf5, "float*", 8, 8, dynshptr, f32, 1, private
829        .arg bbuf, "float*", 8, 8, globalbuf, f32, global, read_only
830        .arg bbuf2, "float*", 8, 8, globalbuf, f32, global, write_only
831        .arg bbuf3, "float*", 8, 8, globalbuf, f32, global, read_write
832        .arg img1, "image1d_t", 8, 8, image, struct, read_only, default
833        .arg img2, "image1d_t", 8, 8, image, struct, write_only, default
834        .arg img3, "image1d_t", 8, 8, image, struct, read_write, default
835        .arg , "", 8, 8, gox, i64
836        .arg , "", 8, 8, goy, i64
837        .arg , "", 8, 8, goz, i64
838        .arg , "", 8, 8, globaloffsetx, i64
839        .arg , "", 8, 8, globaloffsety, i64
840        .arg , "", 8, 8, globaloffsetz, i64
841.text
842kxx1:   .skip 256
843        s_mov_b32 s7, 0
844        s_endpgm
845)ffDXD",
846        R"ffDXD(ROCmBinDump:
847  ROCmSymbol: name=kxx1, offset=0, size=0, type=kernel
848    Config:
849      amdCodeVersion=1.1
850      amdMachine=1:8:0:3
851      kernelCodeEntryOffset=256
852      kernelCodePrefetchOffset=0
853      kernelCodePrefetchSize=0
854      maxScrachBackingMemorySize=0
855      computePgmRsrc1=0xc0040
856      computePgmRsrc2=0x80
857      enableSgprRegisterFlags=0x0
858      enableFeatureFlags=0x0
859      workitemPrivateSegmentSize=0
860      workgroupGroupSegmentSize=0
861      gdsSegmentSize=100
862      kernargSegmentSize=520
863      workgroupFbarrierCount=0
864      wavefrontSgprCount=10
865      workitemVgprCount=1
866      reservedVgprFirst=0
867      reservedVgprCount=0
868      reservedSgprFirst=0
869      reservedSgprCount=0
870      debugWavefrontPrivateSegmentOffsetSgpr=96
871      debugPrivateSegmentBufferSgpr=98
872      kernargSegmentAlignment=5
873      groupSegmentAlignment=4
874      privateSegmentAlignment=4
875      wavefrontSize=6
876      callConvention=0x34dac
877      runtimeLoaderKernelSymbol=0x0
878      ControlDirective:
879      0000000000000000000000000000000000000000000000000000000000000000
880      0000000000000000000000000000000000000000000000000000000000000000
881      0000000000000000000000000000000000000000000000000000000000000000
882      0000000000000000000000000000000000000000000000000000000000000000
883  Comment:
884  nullptr
885  Code:
886  0100000000000000010008000000030000010000000000000000000000000000
887  0000000000000000000000000000000040000c00800000000000000000000000
888  00000000640000000802000000000000000000000a0001000000000000000000
889  6000620005040406ac4d03000000000000000000000000000000000000000000
890  0000000000000000000000000000000000000000000000000000000000000000
891  0000000000000000000000000000000000000000000000000000000000000000
892  0000000000000000000000000000000000000000000000000000000000000000
893  0000000000000000000000000000000000000000000000000000000000000000
894  800087be000081bf
895  MetadataInfo:
896    Version: 3.5
897    Printf: 1, 5, 7, 2, 11; "sometext %d %e %f"
898    Printf: 2; "sometext"
899    Printf: 4294967295, 16, 8, 2, 4; "sometext %d %e %f"
900    Kernel: kxx1
901      SymName=kxx1@kd
902      Language=Poliglot 3.1
903      ReqdWorkGroupSize=6 2 4
904      WorkGroupSizeHint=5 7 2
905      VecTypeHint=float16
906      RuntimeHandle=
907      KernargSegmentSize=64
908      KernargSegmentAlign=8
909      GroupSegmentFixedSize=0
910      PrivateSegmentFixedSize=0
911      WaveFrontSize=64
912      SgprsNum=14
913      VgprsNum=11
914      SpilledSgprs=11
915      SpilledVgprs=52
916      MaxFlatWorkGroupSize=256
917      FixedWorkGroupSize=0 0 0
918      Arg name=n, type=uint, size=4, align=4
919        valuekind=value, valuetype=u32, pointeeAlign=0
920        addrSpace=none, accQual=default, actAccQual=default
921        Flags=
922      Arg name=n2, type=uint, size=12, align=16
923        valuekind=value, valuetype=u32, pointeeAlign=0
924        addrSpace=none, accQual=default, actAccQual=default
925        Flags=
926      Arg name=x0, type=char, size=1, align=16
927        valuekind=value, valuetype=i8, pointeeAlign=0
928        addrSpace=none, accQual=default, actAccQual=default
929        Flags=
930      Arg name=x1, type=int8, size=1, align=16
931        valuekind=value, valuetype=i8, pointeeAlign=0
932        addrSpace=none, accQual=default, actAccQual=default
933        Flags=
934      Arg name=x2, type=short, size=2, align=16
935        valuekind=value, valuetype=i16, pointeeAlign=0
936        addrSpace=none, accQual=default, actAccQual=default
937        Flags=
938      Arg name=x3, type=int16, size=2, align=16
939        valuekind=value, valuetype=i16, pointeeAlign=0
940        addrSpace=none, accQual=default, actAccQual=default
941        Flags=
942      Arg name=x4, type=int, size=4, align=16
943        valuekind=value, valuetype=i32, pointeeAlign=0
944        addrSpace=none, accQual=default, actAccQual=default
945        Flags=
946      Arg name=x5, type=int32, size=4, align=16
947        valuekind=value, valuetype=i32, pointeeAlign=0
948        addrSpace=none, accQual=default, actAccQual=default
949        Flags=
950      Arg name=x6, type=long, size=8, align=16
951        valuekind=value, valuetype=i64, pointeeAlign=0
952        addrSpace=none, accQual=default, actAccQual=default
953        Flags=
954      Arg name=x7, type=int64, size=8, align=16
955        valuekind=value, valuetype=i64, pointeeAlign=0
956        addrSpace=none, accQual=default, actAccQual=default
957        Flags=
958      Arg name=x8, type=uchar, size=1, align=16
959        valuekind=value, valuetype=u8, pointeeAlign=0
960        addrSpace=none, accQual=default, actAccQual=default
961        Flags=
962      Arg name=x9, type=uint8, size=1, align=16
963        valuekind=value, valuetype=u8, pointeeAlign=0
964        addrSpace=none, accQual=default, actAccQual=default
965        Flags=
966      Arg name=x10, type=ushort, size=2, align=16
967        valuekind=value, valuetype=i16, pointeeAlign=0
968        addrSpace=none, accQual=default, actAccQual=default
969        Flags=
970      Arg name=x11, type=uint16, size=2, align=16
971        valuekind=value, valuetype=u16, pointeeAlign=0
972        addrSpace=none, accQual=default, actAccQual=default
973        Flags=
974      Arg name=x12, type=uint, size=4, align=16
975        valuekind=value, valuetype=u32, pointeeAlign=0
976        addrSpace=none, accQual=default, actAccQual=default
977        Flags=
978      Arg name=x13, type=uint32, size=4, align=16
979        valuekind=value, valuetype=u32, pointeeAlign=0
980        addrSpace=none, accQual=default, actAccQual=default
981        Flags=
982      Arg name=x14, type=ulong, size=8, align=16
983        valuekind=value, valuetype=u64, pointeeAlign=0
984        addrSpace=none, accQual=default, actAccQual=default
985        Flags=
986      Arg name=x15, type=uint64, size=8, align=16
987        valuekind=value, valuetype=u64, pointeeAlign=0
988        addrSpace=none, accQual=default, actAccQual=default
989        Flags=
990      Arg name=x16, type=half, size=2, align=16
991        valuekind=value, valuetype=f16, pointeeAlign=0
992        addrSpace=none, accQual=default, actAccQual=default
993        Flags=
994      Arg name=x17, type=fp16, size=2, align=16
995        valuekind=value, valuetype=f16, pointeeAlign=0
996        addrSpace=none, accQual=default, actAccQual=default
997        Flags=
998      Arg name=x18, type=float, size=4, align=16
999        valuekind=value, valuetype=f32, pointeeAlign=0
1000        addrSpace=none, accQual=default, actAccQual=default
1001        Flags=
1002      Arg name=x19, type=fp32, size=4, align=16
1003        valuekind=value, valuetype=f32, pointeeAlign=0
1004        addrSpace=none, accQual=default, actAccQual=default
1005        Flags=
1006      Arg name=x20, type=double, size=8, align=16
1007        valuekind=value, valuetype=f64, pointeeAlign=0
1008        addrSpace=none, accQual=default, actAccQual=default
1009        Flags=
1010      Arg name=x21, type=fp64, size=8, align=16
1011        valuekind=value, valuetype=f64, pointeeAlign=0
1012        addrSpace=none, accQual=default, actAccQual=default
1013        Flags=
1014      Arg name=a, type=float*, size=8, align=8
1015        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1016        addrSpace=global, accQual=default, actAccQual=default
1017        Flags= const volatile
1018      Arg name=abuf, type=float*, size=8, align=8
1019        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1020        addrSpace=constant, accQual=default, actAccQual=default
1021        Flags=
1022      Arg name=abuf2, type=float*, size=8, align=8
1023        valuekind=dynshptr, valuetype=f32, pointeeAlign=1
1024        addrSpace=local, accQual=default, actAccQual=default
1025        Flags=
1026      Arg name=abuf3, type=float*, size=8, align=8
1027        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1028        addrSpace=generic, accQual=default, actAccQual=default
1029        Flags=
1030      Arg name=abuf4, type=float*, size=8, align=8
1031        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1032        addrSpace=region, accQual=default, actAccQual=default
1033        Flags=
1034      Arg name=abuf5, type=float*, size=8, align=8
1035        valuekind=dynshptr, valuetype=f32, pointeeAlign=1
1036        addrSpace=private, accQual=default, actAccQual=default
1037        Flags=
1038      Arg name=bbuf, type=float*, size=8, align=8
1039        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1040        addrSpace=global, accQual=default, actAccQual=read_only
1041        Flags=
1042      Arg name=bbuf2, type=float*, size=8, align=8
1043        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1044        addrSpace=global, accQual=default, actAccQual=write_only
1045        Flags=
1046      Arg name=bbuf3, type=float*, size=8, align=8
1047        valuekind=globalbuf, valuetype=f32, pointeeAlign=0
1048        addrSpace=global, accQual=default, actAccQual=read_write
1049        Flags=
1050      Arg name=img1, type=image1d_t, size=8, align=8
1051        valuekind=image, valuetype=struct, pointeeAlign=0
1052        addrSpace=none, accQual=read_only, actAccQual=default
1053        Flags=
1054      Arg name=img2, type=image1d_t, size=8, align=8
1055        valuekind=image, valuetype=struct, pointeeAlign=0
1056        addrSpace=none, accQual=write_only, actAccQual=default
1057        Flags=
1058      Arg name=img3, type=image1d_t, size=8, align=8
1059        valuekind=image, valuetype=struct, pointeeAlign=0
1060        addrSpace=none, accQual=read_write, actAccQual=default
1061        Flags=
1062      Arg name=, type=, size=8, align=8
1063        valuekind=gox, valuetype=i64, pointeeAlign=0
1064        addrSpace=none, accQual=default, actAccQual=default
1065        Flags=
1066      Arg name=, type=, size=8, align=8
1067        valuekind=goy, valuetype=i64, pointeeAlign=0
1068        addrSpace=none, accQual=default, actAccQual=default
1069        Flags=
1070      Arg name=, type=, size=8, align=8
1071        valuekind=goz, valuetype=i64, pointeeAlign=0
1072        addrSpace=none, accQual=default, actAccQual=default
1073        Flags=
1074      Arg name=, type=, size=8, align=8
1075        valuekind=gox, valuetype=i64, pointeeAlign=0
1076        addrSpace=none, accQual=default, actAccQual=default
1077        Flags=
1078      Arg name=, type=, size=8, align=8
1079        valuekind=gox, valuetype=i64, pointeeAlign=0
1080        addrSpace=none, accQual=default, actAccQual=default
1081        Flags=
1082      Arg name=, type=, size=8, align=8
1083        valuekind=gox, valuetype=i64, pointeeAlign=0
1084        addrSpace=none, accQual=default, actAccQual=default
1085        Flags=
1086  EFlags=3
1087  NewBinFormat
1088)ffDXD",
1089        "", true
1090    },
1091    {   // next metadata info example
1092        R"ffDXD(.rocm
1093        .gpu Fiji
1094        .eflags 3
1095        .newbinfmt
1096        .md_version 3 , 5
1097.kernel kxx1
1098    .config
1099        .dims x
1100        .codeversion 1,0
1101        .call_convention 0x34dac
1102        .debug_private_segment_buffer_sgpr 98
1103        .debug_wavefront_private_segment_offset_sgpr 96
1104        .gds_segment_size 100
1105        .kernarg_segment_align 32
1106    # metadata
1107        .md_language "jezorx"
1108        .reqd_work_group_size 6,
1109        .work_group_size_hint 5,7
1110        .fixed_work_group_size 3,,71
1111        .md_kernarg_segment_size 64
1112        .md_kernarg_segment_align 32
1113        .md_group_segment_fixed_size 1121
1114        .md_private_segment_fixed_size 6632
1115        .md_wavefront_size 64
1116        .md_sgprsnum 14
1117        .md_vgprsnum 11
1118        .runtime_handle "SomeCodeToExec"
1119        # arg infos
1120        .arg , "", 8, 8, none, i64
1121        .arg , "", 8, 8, complact, i64
1122        .arg , "", 8, 8, printfbuf, i64
1123        .arg , "", 8, 8, defqueue, i64
1124        .arg pipe0, "pipe_t", 8, 8, pipe, struct, read_write, default pipe
1125        .arg qx01, "queue_t", 8, 8, queue, struct
1126        .arg masksamp, "sampler_t", 8, 8, sampler, struct
1127        .arg vxx1, "void*", 8, 8, globalbuf, i8, global, default const
1128        .arg vx1, "void*", 8, 8, globalbuf, i8, global, default volatile
1129        .arg dx3, "void*", 8, 8, globalbuf, i8, global, default restrict
1130        .arg ex6, "void*", 8, 8, globalbuf, i8, global, default pipe
1131        .arg fx9, "void*", 8, 8, globalbuf, i8, global, default volatile const restrict
1132.text
1133kxx1:   .skip 256
1134        s_mov_b32 s7, 0
1135        s_endpgm
1136)ffDXD",
1137        R"ffDXD(ROCmBinDump:
1138  ROCmSymbol: name=kxx1, offset=0, size=0, type=kernel
1139    Config:
1140      amdCodeVersion=1.1
1141      amdMachine=1:8:0:3
1142      kernelCodeEntryOffset=256
1143      kernelCodePrefetchOffset=0
1144      kernelCodePrefetchSize=0
1145      maxScrachBackingMemorySize=0
1146      computePgmRsrc1=0xc0040
1147      computePgmRsrc2=0x80
1148      enableSgprRegisterFlags=0x0
1149      enableFeatureFlags=0x0
1150      workitemPrivateSegmentSize=0
1151      workgroupGroupSegmentSize=0
1152      gdsSegmentSize=100
1153      kernargSegmentSize=96
1154      workgroupFbarrierCount=0
1155      wavefrontSgprCount=10
1156      workitemVgprCount=1
1157      reservedVgprFirst=0
1158      reservedVgprCount=0
1159      reservedSgprFirst=0
1160      reservedSgprCount=0
1161      debugWavefrontPrivateSegmentOffsetSgpr=96
1162      debugPrivateSegmentBufferSgpr=98
1163      kernargSegmentAlignment=5
1164      groupSegmentAlignment=4
1165      privateSegmentAlignment=4
1166      wavefrontSize=6
1167      callConvention=0x34dac
1168      runtimeLoaderKernelSymbol=0x0
1169      ControlDirective:
1170      0000000000000000000000000000000000000000000000000000000000000000
1171      0000000000000000000000000000000000000000000000000000000000000000
1172      0000000000000000000000000000000000000000000000000000000000000000
1173      0000000000000000000000000000000000000000000000000000000000000000
1174  Comment:
1175  nullptr
1176  Code:
1177  0100000000000000010008000000030000010000000000000000000000000000
1178  0000000000000000000000000000000040000c00800000000000000000000000
1179  00000000640000006000000000000000000000000a0001000000000000000000
1180  6000620005040406ac4d03000000000000000000000000000000000000000000
1181  0000000000000000000000000000000000000000000000000000000000000000
1182  0000000000000000000000000000000000000000000000000000000000000000
1183  0000000000000000000000000000000000000000000000000000000000000000
1184  0000000000000000000000000000000000000000000000000000000000000000
1185  800087be000081bf
1186  MetadataInfo:
1187    Version: 3.5
1188    Kernel: kxx1
1189      SymName=
1190      Language=jezorx 0.0
1191      ReqdWorkGroupSize=6 1 1
1192      WorkGroupSizeHint=5 7 1
1193      VecTypeHint=
1194      RuntimeHandle=SomeCodeToExec
1195      KernargSegmentSize=64
1196      KernargSegmentAlign=32
1197      GroupSegmentFixedSize=1121
1198      PrivateSegmentFixedSize=6632
1199      WaveFrontSize=64
1200      SgprsNum=14
1201      VgprsNum=11
1202      SpilledSgprs=4294967294
1203      SpilledVgprs=4294967294
1204      MaxFlatWorkGroupSize=18446744073709551614
1205      FixedWorkGroupSize=3 1 71
1206      Arg name=, type=, size=8, align=8
1207        valuekind=none, valuetype=i64, pointeeAlign=0
1208        addrSpace=none, accQual=default, actAccQual=default
1209        Flags=
1210      Arg name=, type=, size=8, align=8
1211        valuekind=complact, valuetype=i64, pointeeAlign=0
1212        addrSpace=none, accQual=default, actAccQual=default
1213        Flags=
1214      Arg name=, type=, size=8, align=8
1215        valuekind=printfbuf, valuetype=i64, pointeeAlign=0
1216        addrSpace=none, accQual=default, actAccQual=default
1217        Flags=
1218      Arg name=, type=, size=8, align=8
1219        valuekind=defqueue, valuetype=i64, pointeeAlign=0
1220        addrSpace=none, accQual=default, actAccQual=default
1221        Flags=
1222      Arg name=pipe0, type=pipe_t, size=8, align=8
1223        valuekind=pipe, valuetype=struct, pointeeAlign=0
1224        addrSpace=none, accQual=read_write, actAccQual=default
1225        Flags= pipe
1226      Arg name=qx01, type=queue_t, size=8, align=8
1227        valuekind=queue, valuetype=struct, pointeeAlign=0
1228        addrSpace=none, accQual=default, actAccQual=default
1229        Flags=
1230      Arg name=masksamp, type=sampler_t, size=8, align=8
1231        valuekind=sampler, valuetype=struct, pointeeAlign=0
1232        addrSpace=none, accQual=default, actAccQual=default
1233        Flags=
1234      Arg name=vxx1, type=void*, size=8, align=8
1235        valuekind=globalbuf, valuetype=i8, pointeeAlign=0
1236        addrSpace=global, accQual=default, actAccQual=default
1237        Flags= const
1238      Arg name=vx1, type=void*, size=8, align=8
1239        valuekind=globalbuf, valuetype=i8, pointeeAlign=0
1240        addrSpace=global, accQual=default, actAccQual=default
1241        Flags= volatile
1242      Arg name=dx3, type=void*, size=8, align=8
1243        valuekind=globalbuf, valuetype=i8, pointeeAlign=0
1244        addrSpace=global, accQual=default, actAccQual=default
1245        Flags= restrict
1246      Arg name=ex6, type=void*, size=8, align=8
1247        valuekind=globalbuf, valuetype=i8, pointeeAlign=0
1248        addrSpace=global, accQual=default, actAccQual=default
1249        Flags= pipe
1250      Arg name=fx9, type=void*, size=8, align=8
1251        valuekind=globalbuf, valuetype=i8, pointeeAlign=0
1252        addrSpace=global, accQual=default, actAccQual=default
1253        Flags= const restrict volatile
1254  EFlags=3
1255  NewBinFormat
1256)ffDXD",
1257        "", true
1258    },
1259    {   // next metadata info (errors)
1260        R"ffDXD(.rocm
1261        .gpu Fiji
1262        .eflags 3
1263        .newbinfmt
1264        .md_version 3 , 5
1265.kernel kxx1
1266    .config
1267        .dims x
1268        .codeversion 1,0
1269        .call_convention 0x34dac
1270        .debug_private_segment_buffer_sgpr 98
1271        .debug_wavefront_private_segment_offset_sgpr 96
1272        .gds_segment_size 100
1273        .kernarg_segment_align 32
1274    # metadata
1275    .text
1276        .md_language "jezorx"
1277        .reqd_work_group_size 6,
1278        .work_group_size_hint 5,7
1279        .fixed_work_group_size 3,,71
1280        .md_kernarg_segment_size 64
1281        .md_kernarg_segment_align 32
1282        .md_group_segment_fixed_size 1121
1283        .md_private_segment_fixed_size 6632
1284        .md_wavefront_size 64
1285        .md_sgprsnum 14
1286        .md_vgprsnum 11
1287        .spilledsgprs 34
1288        .spilledvgprs 42
1289        .runtime_handle "SomeCodeToExec"
1290        # arg infos
1291        .arg , "", 8, 8, none, i64
1292        .arg , "", 8, 8, complact, i64
1293.kernel kxx1
1294        .arg , "", 8, 8, printfbuxf, i64
1295        .arg vx1, "void*", 8, 8, globalbuf, xi8, global, default volatile
1296        .arg vx1, "void*", 8, 8, globalbuf, xi8, global, default volxx
1297        .arg vx1, "void*", 8, 8, globalbuf, i8, global, volatile
1298        .arg vx1, "void*", 8, 8, globalbuf, i8, global :: xx
1299        .md_language "jezorx", ,
1300        .md_vgprsnum 
1301        .runtime_handle 144
1302.main
1303        .printf 22,,,"aa"
1304.text
1305kxx1:   .skip 256
1306        s_mov_b32 s7, 0
1307        s_endpgm
1308)ffDXD",
1309        "",
1310        R"ffDXD(test.s:17:9: Error: Illegal place of configuration pseudo-op
1311test.s:18:9: Error: Illegal place of configuration pseudo-op
1312test.s:19:9: Error: Illegal place of configuration pseudo-op
1313test.s:20:9: Error: Illegal place of configuration pseudo-op
1314test.s:21:9: Error: Illegal place of configuration pseudo-op
1315test.s:22:9: Error: Illegal place of configuration pseudo-op
1316test.s:23:9: Error: Illegal place of configuration pseudo-op
1317test.s:24:9: Error: Illegal place of configuration pseudo-op
1318test.s:25:9: Error: Illegal place of configuration pseudo-op
1319test.s:26:9: Error: Illegal place of configuration pseudo-op
1320test.s:27:9: Error: Illegal place of configuration pseudo-op
1321test.s:28:9: Error: Illegal place of configuration pseudo-op
1322test.s:29:9: Error: Illegal place of configuration pseudo-op
1323test.s:30:9: Error: Illegal place of configuration pseudo-op
1324test.s:32:9: Error: Illegal place of configuration pseudo-op
1325test.s:33:9: Error: Illegal place of configuration pseudo-op
1326test.s:35:26: Error: Unknown value kind
1327test.s:36:45: Error: Unknown value type
1328test.s:37:45: Error: Unknown value type
1329test.s:37:66: Error: Unknown argument flag
1330test.s:38:57: Error: Unknown access qualifier
1331test.s:39:56: Error: Some garbages at argument flag place
1332test.s:40:32: Error: Expected expression
1333test.s:40:33: Error: Expected expression
1334test.s:41:23: Error: Expected expression
1335test.s:42:25: Error: Expected string
1336test.s:44:20: Error: Expected expression
1337test.s:44:21: Error: Expected expression
1338)ffDXD", false
1339    },
1340};
1341
1342static void testAssembler(cxuint testId, const AsmTestCase& testCase)
1343{
1344    std::istringstream input(testCase.input);
1345    std::ostringstream errorStream;
1346    std::ostringstream printStream;
1347   
1348    // create assembler with testcase's input and with ASM_TESTRUN flag
1349    Assembler assembler("test.s", input, (ASM_ALL|ASM_TESTRUN)&~ASM_ALTMACRO,
1350            BinaryFormat::AMD, GPUDeviceType::CAPE_VERDE, errorStream, printStream);
1351    bool good = assembler.assemble();
1352   
1353    std::ostringstream dumpOss;
1354    if (good && assembler.getFormatHandler()!=nullptr)
1355        // get format handler and their output
1356        printROCmOutput(dumpOss, static_cast<const AsmROCmHandler*>(
1357                    assembler.getFormatHandler())->getOutput());
1358    /* compare results dump with expected dump */
1359    char testName[30];
1360    snprintf(testName, 30, "Test #%u", testId);
1361   
1362    assertValue(testName, "good", int(testCase.good), int(good));
1363    assertString(testName, "dump", testCase.dump, dumpOss.str());
1364    assertString(testName, "errorMessages", testCase.errors, errorStream.str());
1365}
1366
1367int main(int argc, const char** argv)
1368{
1369    int retVal = 0;
1370    for (size_t i = 0; i < sizeof(asmTestCases1Tbl)/sizeof(AsmTestCase); i++)
1371        try
1372        { testAssembler(i, asmTestCases1Tbl[i]); }
1373        catch(const std::exception& ex)
1374        {
1375            std::cerr << ex.what() << std::endl;
1376            retVal = 1;
1377        }
1378    return retVal;
1379}
Note: See TracBrowser for help on using the repository browser.