source: CLRX/CLRadeonExtender/trunk/tests/amdasm/AsmROCmFormat.cpp @ 3575

Last change on this file since 3575 was 3575, checked in by matszpk, 3 years ago

CLRadeonExtender: Change Copyright dates.

File size: 18.7 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2018 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19
20#include <CLRX/Config.h>
21#include <iostream>
22#include <cstdio>
23#include <sstream>
24#include <algorithm>
25#include <memory>
26#include <CLRX/amdasm/Assembler.h>
27#include "../TestUtils.h"
28
29using namespace CLRX;
30
31static void printHexData(std::ostream& os, cxuint indentLevel, size_t size,
32             const cxbyte* data)
33{
34    if (data==nullptr)
35    {
36        for (cxuint j = 0; j < indentLevel; j++)
37            os << "  ";
38        os << "nullptr\n";
39        return;
40    }
41    for (size_t i = 0; i < size; i++)
42    {
43        if ((i&31)==0)
44            for (cxuint j = 0; j < indentLevel; j++)
45                os << "  ";
46        char buf[10];
47        snprintf(buf, 10, "%02x", cxuint(data[i]));
48        os << buf;
49        if ((i&31)==31 || i+1 == size)
50            os << '\n';
51    }
52}
53
54static const char* rocmRegionTypeNames[3] =
55{ "data", "fkernel", "kernel" };
56
57// print dump of ROCm output to stream for comparing with testcase
58static void printROCmOutput(std::ostream& os, const ROCmInput* output)
59{
60    os << "ROCmBinDump:" << std::endl;
61    for (const ROCmSymbolInput& symbol: output->symbols)
62    {
63        os << "  ROCmSymbol: name=" << symbol.symbolName << ", " <<
64                "offset=" << symbol.offset << ", size=" << symbol.size << ", type=" <<
65                rocmRegionTypeNames[cxuint(symbol.type)] << "\n";
66        if (symbol.type == ROCmRegionType::DATA)
67            continue;
68        if (symbol.offset+sizeof(ROCmKernelConfig) > output->codeSize)
69            continue;
70        const ROCmKernelConfig& config = *reinterpret_cast<const ROCmKernelConfig*>(
71                            output->code + symbol.offset);
72       
73        // print kernel configuration
74        os << "    Config:\n"
75            "      amdCodeVersion=" << ULEV(config.amdCodeVersionMajor) << "." <<
76                ULEV(config.amdCodeVersionMajor) << "\n"
77            "      amdMachine=" << ULEV(config.amdMachineKind) << ":" <<
78                ULEV(config.amdMachineMajor) << ":" <<
79                ULEV(config.amdMachineMinor) << ":" <<
80                ULEV(config.amdMachineStepping) << "\n"
81            "      kernelCodeEntryOffset=" << ULEV(config.kernelCodeEntryOffset) << "\n"
82            "      kernelCodePrefetchOffset=" <<
83                ULEV(config.kernelCodePrefetchOffset) << "\n"
84            "      kernelCodePrefetchSize=" << ULEV(config.kernelCodePrefetchSize) << "\n"
85            "      maxScrachBackingMemorySize=" <<
86                ULEV(config.maxScrachBackingMemorySize) << "\n"
87            "      computePgmRsrc1=0x" << std::hex << ULEV(config.computePgmRsrc1) << "\n"
88            "      computePgmRsrc2=0x" << ULEV(config.computePgmRsrc2) << "\n"
89            "      enableSgprRegisterFlags=0x" <<
90                ULEV(config.enableSgprRegisterFlags) << "\n"
91            "      enableFeatureFlags=0x" <<
92                ULEV(config.enableFeatureFlags) << std::dec << "\n"
93            "      workitemPrivateSegmentSize=" <<
94                ULEV(config.workitemPrivateSegmentSize) << "\n"
95            "      workgroupGroupSegmentSize=" <<
96                ULEV(config.workgroupGroupSegmentSize) << "\n"
97            "      gdsSegmentSize=" << ULEV(config.gdsSegmentSize) << "\n"
98            "      kernargSegmentSize=" << ULEV(config.kernargSegmentSize) << "\n"
99            "      workgroupFbarrierCount=" << ULEV(config.workgroupFbarrierCount) << "\n"
100            "      wavefrontSgprCount=" << ULEV(config.wavefrontSgprCount) << "\n"
101            "      workitemVgprCount=" << ULEV(config.workitemVgprCount) << "\n"
102            "      reservedVgprFirst=" << ULEV(config.reservedVgprFirst) << "\n"
103            "      reservedVgprCount=" << ULEV(config.reservedVgprCount) << "\n"
104            "      reservedSgprFirst=" << ULEV(config.reservedSgprFirst) << "\n"
105            "      reservedSgprCount=" << ULEV(config.reservedSgprCount) << "\n"
106            "      debugWavefrontPrivateSegmentOffsetSgpr=" <<
107                ULEV(config.debugWavefrontPrivateSegmentOffsetSgpr) << "\n"
108            "      debugPrivateSegmentBufferSgpr=" <<
109                ULEV(config.debugPrivateSegmentBufferSgpr) << "\n"
110            "      kernargSegmentAlignment=" << 
111                cxuint(config.kernargSegmentAlignment) << "\n"
112            "      groupSegmentAlignment=" <<
113                cxuint(config.groupSegmentAlignment) << "\n"
114            "      privateSegmentAlignment=" <<
115                cxuint(config.privateSegmentAlignment) << "\n"
116            "      wavefrontSize=" << cxuint(config.wavefrontSize) << "\n"
117            "      callConvention=0x" << std::hex << ULEV(config.callConvention) << "\n"
118            "      runtimeLoaderKernelSymbol=0x" <<
119                ULEV(config.runtimeLoaderKernelSymbol) << std::dec << "\n";
120        os << "      ControlDirective:\n";
121        printHexData(os, 3, 128, config.controlDirective);
122    }
123    // print comment and code
124    os << "  Comment:\n";
125    printHexData(os, 1, output->commentSize, (const cxbyte*)output->comment);
126    os << "  Code:\n";
127    printHexData(os, 1, output->codeSize, output->code);
128   
129    // print extra sections if supplied
130    for (BinSection section: output->extraSections)
131    {
132        os << "  Section " << section.name << ", type=" << section.type <<
133                        ", flags=" << section.flags << ":\n";
134        printHexData(os, 1, section.size, section.data);
135    }
136    // print extra symbols if supplied
137    for (BinSymbol symbol: output->extraSymbols)
138        os << "  Symbol: name=" << symbol.name << ", value=" << symbol.value <<
139                ", size=" << symbol.size << ", section=" << symbol.sectionId << "\n";
140    os.flush();
141}
142
143
144struct AsmTestCase
145{
146    const char* input;
147    const char* dump;
148    const char* errors;
149    bool good;
150};
151
152static const AsmTestCase asmTestCases1Tbl[] =
153{
154    {
155        R"ffDXD(        .rocm
156        .gpu Fiji
157.kernel kxx1
158    .fkernel
159    .config
160        .dims x
161        .codeversion 1,0
162        .call_convention 0x34dac
163        .debug_private_segment_buffer_sgpr 98
164        .debug_wavefront_private_segment_offset_sgpr 96
165        .gds_segment_size 100
166        .kernarg_segment_align 32
167        .workgroup_group_segment_size 22
168        .workgroup_fbarrier_count 3324
169        .dx10clamp
170        .exceptions 10
171        .private_segment_align 128
172        .privmode
173        .reserved_sgprs 5,14
174        .runtime_loader_kernel_symbol 0x4dc98b3a
175        .scratchbuffer 77222
176        .reserved_sgprs 9,12
177        .reserved_vgprs 7,17
178        .private_elem_size 16
179    .control_directive
180        .int 1,2,3
181        .fill 116,1,0
182.kernel kxx2
183    .config
184        .dims x
185        .codeversion 1,0
186        .call_convention 0x112223
187.kernel kxx1
188    .config
189        .scratchbuffer 111
190.text
191kxx1:
192        .skip 256
193        s_mov_b32 s7, 0
194        s_endpgm
195       
196.align 256
197kxx2:
198        .skip 256
199        s_endpgm
200.section .comment
201        .ascii "some comment for you"
202.kernel kxx2
203    .control_directive
204        .fill 124,1,0xde
205    .config
206        .use_kernarg_segment_ptr
207    .control_directive
208        .int 0xaadd66cc
209    .config
210.kernel kxx1
211.kernel kxx2
212        .call_convention 0x1112234
213       
214)ffDXD",
215        /* dump */
216        R"ffDXD(ROCmBinDump:
217  ROCmSymbol: name=kxx1, offset=0, size=0, type=fkernel
218    Config:
219      amdCodeVersion=1.1
220      amdMachine=1:8:0:3
221      kernelCodeEntryOffset=256
222      kernelCodePrefetchOffset=0
223      kernelCodePrefetchSize=0
224      maxScrachBackingMemorySize=0
225      computePgmRsrc1=0x3c0040
226      computePgmRsrc2=0xa008081
227      enableSgprRegisterFlags=0x0
228      enableFeatureFlags=0x6
229      workitemPrivateSegmentSize=111
230      workgroupGroupSegmentSize=22
231      gdsSegmentSize=100
232      kernargSegmentSize=0
233      workgroupFbarrierCount=3324
234      wavefrontSgprCount=10
235      workitemVgprCount=1
236      reservedVgprFirst=7
237      reservedVgprCount=11
238      reservedSgprFirst=9
239      reservedSgprCount=4
240      debugWavefrontPrivateSegmentOffsetSgpr=96
241      debugPrivateSegmentBufferSgpr=98
242      kernargSegmentAlignment=5
243      groupSegmentAlignment=4
244      privateSegmentAlignment=7
245      wavefrontSize=6
246      callConvention=0x34dac
247      runtimeLoaderKernelSymbol=0x4dc98b3a
248      ControlDirective:
249      0100000002000000030000000000000000000000000000000000000000000000
250      0000000000000000000000000000000000000000000000000000000000000000
251      0000000000000000000000000000000000000000000000000000000000000000
252      0000000000000000000000000000000000000000000000000000000000000000
253  ROCmSymbol: name=kxx2, offset=512, size=0, type=kernel
254    Config:
255      amdCodeVersion=1.1
256      amdMachine=1:8:0:3
257      kernelCodeEntryOffset=256
258      kernelCodePrefetchOffset=0
259      kernelCodePrefetchSize=0
260      maxScrachBackingMemorySize=0
261      computePgmRsrc1=0xc0000
262      computePgmRsrc2=0x84
263      enableSgprRegisterFlags=0x8
264      enableFeatureFlags=0x0
265      workitemPrivateSegmentSize=0
266      workgroupGroupSegmentSize=0
267      gdsSegmentSize=0
268      kernargSegmentSize=0
269      workgroupFbarrierCount=0
270      wavefrontSgprCount=5
271      workitemVgprCount=1
272      reservedVgprFirst=0
273      reservedVgprCount=0
274      reservedSgprFirst=0
275      reservedSgprCount=0
276      debugWavefrontPrivateSegmentOffsetSgpr=0
277      debugPrivateSegmentBufferSgpr=0
278      kernargSegmentAlignment=4
279      groupSegmentAlignment=4
280      privateSegmentAlignment=4
281      wavefrontSize=6
282      callConvention=0x1112234
283      runtimeLoaderKernelSymbol=0x0
284      ControlDirective:
285      dededededededededededededededededededededededededededededededede
286      dededededededededededededededededededededededededededededededede
287      dededededededededededededededededededededededededededededededede
288      dedededededededededededededededededededededededededededecc66ddaa
289  Comment:
290  736f6d6520636f6d6d656e7420666f7220796f75
291  Code:
292  0100000000000000010008000000030000010000000000000000000000000000
293  0000000000000000000000000000000040003c008180000a000006006f000000
294  16000000640000000000000000000000fc0c00000a00010007000b0009000400
295  6000620005040706ac4d03000000000000000000000000003a8bc94d00000000
296  0100000002000000030000000000000000000000000000000000000000000000
297  0000000000000000000000000000000000000000000000000000000000000000
298  0000000000000000000000000000000000000000000000000000000000000000
299  0000000000000000000000000000000000000000000000000000000000000000
300  800087be000081bf000080bf000080bf000080bf000080bf000080bf000080bf
301  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
302  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
303  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
304  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
305  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
306  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
307  000080bf000080bf000080bf000080bf000080bf000080bf000080bf000080bf
308  0100000000000000010008000000030000010000000000000000000000000000
309  0000000000000000000000000000000000000c00840000000800000000000000
310  0000000000000000000000000000000000000000050001000000000000000000
311  0000000004040406342211010000000000000000000000000000000000000000
312  dededededededededededededededededededededededededededededededede
313  dededededededededededededededededededededededededededededededede
314  dededededededededededededededededededededededededededededededede
315  dedededededededededededededededededededededededededededecc66ddaa
316  000081bf
317)ffDXD",
318        /* warning/errors */
319        "",
320        true
321    },
322    {
323        R"ffDXD(        .rocm
324        .gpu Fiji
325.kernel someKernelX
326    .config
327        .dims xz
328        .call_convention 331
329        .codeversion 1,0
330        .machine 8,0,1,2
331        .debug_private_segment_buffer_sgpr 10
332        .debug_wavefront_private_segment_offset_sgpr 31
333        .exceptions 0x3e
334        .floatmode 0xc3
335        .gds_segment_size 105
336        .group_segment_align 128
337        .kernarg_segment_align 64
338        .kernarg_segment_size 228
339        .kernel_code_entry_offset 256
340        .kernel_code_prefetch_offset 1002
341        .kernel_code_prefetch_size 13431
342        .max_scratch_backing_memory 4212
343        .pgmrsrc1 0xa0000000
344        .pgmrsrc2 0xd00000
345        .priority 2
346        .private_elem_size 8
347        .private_segment_align 32
348        .reserved_sgprs 12,19
349        .reserved_vgprs 26,48
350        .runtime_loader_kernel_symbol 0x3eda1
351        .scratchbuffer 2330
352        .use_debug_enabled
353        .use_flat_scratch_init
354        .use_grid_workgroup_count xz
355        .use_private_segment_buffer
356        .use_ptr64
357        .use_xnack_enabled
358        .wavefront_size 256
359        .workgroup_fbarrier_count 69
360        .workgroup_group_segment_size 324
361        .workitem_private_segment_size 33
362        .vgprsnum 211
363        .sgprsnum 85
364.text
365someKernelX:
366        .skip 256
367        s_endpgm)ffDXD",
368        R"ffDXD(ROCmBinDump:
369  ROCmSymbol: name=someKernelX, offset=0, size=0, type=kernel
370    Config:
371      amdCodeVersion=1.1
372      amdMachine=8:0:1:2
373      kernelCodeEntryOffset=256
374      kernelCodePrefetchOffset=1002
375      kernelCodePrefetchSize=13431
376      maxScrachBackingMemorySize=4212
377      computePgmRsrc1=0xa00c3ab4
378      computePgmRsrc2=0x3ed09291
379      enableSgprRegisterFlags=0x2a1
380      enableFeatureFlags=0x6c
381      workitemPrivateSegmentSize=33
382      workgroupGroupSegmentSize=324
383      gdsSegmentSize=105
384      kernargSegmentSize=228
385      workgroupFbarrierCount=69
386      wavefrontSgprCount=85
387      workitemVgprCount=211
388      reservedVgprFirst=26
389      reservedVgprCount=23
390      reservedSgprFirst=12
391      reservedSgprCount=8
392      debugWavefrontPrivateSegmentOffsetSgpr=31
393      debugPrivateSegmentBufferSgpr=10
394      kernargSegmentAlignment=6
395      groupSegmentAlignment=7
396      privateSegmentAlignment=5
397      wavefrontSize=8
398      callConvention=0x14b
399      runtimeLoaderKernelSymbol=0x3eda1
400      ControlDirective:
401      0000000000000000000000000000000000000000000000000000000000000000
402      0000000000000000000000000000000000000000000000000000000000000000
403      0000000000000000000000000000000000000000000000000000000000000000
404      0000000000000000000000000000000000000000000000000000000000000000
405  Comment:
406  nullptr
407  Code:
408  010000000000000008000000010002000001000000000000ea03000000000000
409  77340000000000007410000000000000b43a0ca09192d03ea1026c0021000000
410  4401000069000000e400000000000000450000005500d3001a0017000c000800
411  1f000a00060705084b010000000000000000000000000000a1ed030000000000
412  0000000000000000000000000000000000000000000000000000000000000000
413  0000000000000000000000000000000000000000000000000000000000000000
414  0000000000000000000000000000000000000000000000000000000000000000
415  0000000000000000000000000000000000000000000000000000000000000000
416  000081bf
417)ffDXD",
418        /* warning/errors */
419        "",
420        true
421    },
422    {
423        R"ffDXD(        .rocm
424        .gpu Fiji
425.kernel someKernelX
426    .config
427        .dims xz
428        .reserved_vgprs 0, 11
429.text
430someKernelX:
431        s_endpgm)ffDXD",
432        "", "test.s:3:1: Error: "
433        "Code for kernel 'someKernelX' is too small for configuration\n", false
434    },
435    {
436        R"ffDXD(        .rocm
437        .gpu Fiji
438.kernel someKernelX
439    .config
440        .dims xz
441        .reserved_vgprs 12,11
442        .reserved_sgprs 17,11
443        .reserved_vgprs 256,257
444        .reserved_sgprs 112,113
445        .debug_private_segment_buffer_sgpr 123
446        .debug_wavefront_private_segment_offset_sgpr 108
447        .private_elem_size 6
448        .private_elem_size 1
449        .private_elem_size 32
450        .kernarg_segment_align 56
451        .kernarg_segment_align 8
452        .private_segment_align 56
453        .private_segment_align 8
454        .wavefront_size 157
455        .wavefront_size 512
456        .pgmrsrc2 0xaa1fd3da2313
457.text
458someKernelX:
459        .skip 256
460        s_endpgm)ffDXD",
461        "", R"ffDXD(test.s:6:28: Error: Wrong register range
462test.s:7:28: Error: Wrong register range
463test.s:8:25: Error: First reserved VGPR register out of range (0-255)
464test.s:8:29: Error: Last reserved VGPR register out of range (0-255)
465test.s:9:25: Error: First reserved SGPR register out of range (0-101)
466test.s:9:29: Error: Last reserved SGPR register out of range (0-101)
467test.s:10:44: Error: SGPR register out of range
468test.s:11:54: Error: SGPR register out of range
469test.s:12:28: Error: Private element size must be power of two
470test.s:13:28: Error: Private element size out of range
471test.s:14:28: Error: Private element size out of range
472test.s:15:32: Error: Alignment must be power of two
473test.s:16:32: Error: Alignment must be not smaller than 16
474test.s:17:32: Error: Alignment must be power of two
475test.s:18:32: Error: Alignment must be not smaller than 16
476test.s:19:25: Error: Wavefront size must be power of two
477test.s:20:25: Error: Wavefront size must be not greater than 256
478test.s:21:19: Warning: Value 0xaa1fd3da2313 truncated to 0xd3da2313
479)ffDXD", false
480    }
481};
482
483static void testAssembler(cxuint testId, const AsmTestCase& testCase)
484{
485    std::istringstream input(testCase.input);
486    std::ostringstream errorStream;
487    std::ostringstream printStream;
488   
489    // create assembler with testcase's input and with ASM_TESTRUN flag
490    Assembler assembler("test.s", input, (ASM_ALL|ASM_TESTRUN)&~ASM_ALTMACRO,
491            BinaryFormat::AMD, GPUDeviceType::CAPE_VERDE, errorStream, printStream);
492    bool good = assembler.assemble();
493   
494    std::ostringstream dumpOss;
495    if (good && assembler.getFormatHandler()!=nullptr)
496        // get format handler and their output
497        printROCmOutput(dumpOss, static_cast<const AsmROCmHandler*>(
498                    assembler.getFormatHandler())->getOutput());
499    /* compare results dump with expected dump */
500    char testName[30];
501    snprintf(testName, 30, "Test #%u", testId);
502   
503    assertValue(testName, "good", int(testCase.good), int(good));
504    assertString(testName, "dump", testCase.dump, dumpOss.str());
505    assertString(testName, "errorMessages", testCase.errors, errorStream.str());
506}
507
508int main(int argc, const char** argv)
509{
510    int retVal = 0;
511    for (size_t i = 0; i < sizeof(asmTestCases1Tbl)/sizeof(AsmTestCase); i++)
512        try
513        { testAssembler(i, asmTestCases1Tbl[i]); }
514        catch(const std::exception& ex)
515        {
516            std::cerr << ex.what() << std::endl;
517            retVal = 1;
518        }
519    return retVal;
520}
Note: See TracBrowser for help on using the repository browser.