Changeset 3751 in CLRX


Ignore:
Timestamp:
Feb 7, 2018, 8:59:05 PM (17 months ago)
Author:
matszpk
Message:

CLRadeonExtender: AsmROCm: Calculate kernarg segment size from metadata kernel arguments if kernarg is not specified.
CLRXDocs: Add sample code for ROCm with metadata info pseudo-ops.

Location:
CLRadeonExtender/trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • CLRadeonExtender/trunk/amdasm/AsmROCmFormat.cpp

    r3745 r3751  
    22062206    }
    22072207    return true;
     2208}
     2209
     2210static uint64_t calculateKernelArgSize(const std::vector<ROCmKernelArgInfo>& argInfos)
     2211{
     2212    uint64_t size = 0;
     2213    for (const ROCmKernelArgInfo& argInfo: argInfos)
     2214    {
     2215        // alignment
     2216        size = (size + argInfo.align-1) & ~(argInfo.align-1);
     2217        size += argInfo.size;
     2218    }
     2219    return size;
    22082220}
    22092221
     
    23412353            config.gdsSegmentSize = 0;
    23422354        if (config.kernargSegmentSize == BINGEN64_DEFAULT)
    2343             config.kernargSegmentSize = 0;
     2355        {
     2356            if (output.useMetadataInfo)
     2357                // calculate kernel arg size
     2358                config.kernargSegmentSize = calculateKernelArgSize(
     2359                        output.metadataInfo.kernels[i].argInfos);
     2360            else
     2361                config.kernargSegmentSize = 0;
     2362        }
    23442363        if (config.workgroupFbarrierCount == BINGEN_DEFAULT)
    23452364            config.workgroupFbarrierCount = 0;
  • CLRadeonExtender/trunk/doc/ClrxAsmRocm.md

    r3750 r3751  
    710710...
    711711```
     712
     713The sample with metadata info:
     714
     715```
     716.rocm
     717.gpu Fiji
     718.arch_minor 0
     719.arch_stepping 4
     720.eflags 2
     721.newbinfmt
     722.tripple "amdgcn-amd-amdhsa-amdgizcl"
     723.md_version 1, 0
     724.kernel vectorAdd
     725    .config
     726        .dims x
     727        .codeversion 1, 1
     728        .use_private_segment_buffer
     729        .use_dispatch_ptr
     730        .use_kernarg_segment_ptr
     731        .private_elem_size 4
     732        .use_ptr64
     733        .kernarg_segment_align 16
     734        .group_segment_align 16
     735        .private_segment_align 16
     736    .control_directive
     737        .fill 128, 1, 0x00
     738    .config
     739        .md_language "OpenCL", 1, 2
     740        .arg n, "uint", 4, , value, u32
     741        .arg a, "float*", 8, , globalbuf, f32, global, default const volatile
     742        .arg b, "float*", 8, , globalbuf, f32, global, default const
     743        .arg c, "float*", 8, , globalbuf, f32, global, default
     744        .arg , "", 8, , gox, i64
     745        .arg , "", 8, , goy, i64
     746        .arg , "", 8, , goz, i64
     747        .arg , "", 8, , printfbuf, i8
     748.text
     749vectorAdd:
     750.skip 256           # skip ROCm kernel configuration (required)
     751...
     752```
  • CLRadeonExtender/trunk/tests/amdasm/AsmROCmFormat.cpp

    r3747 r3751  
    860860      workgroupGroupSegmentSize=0
    861861      gdsSegmentSize=100
    862       kernargSegmentSize=0
     862      kernargSegmentSize=520
    863863      workgroupFbarrierCount=0
    864864      wavefrontSgprCount=10
     
    886886  0100000000000000010008000000030000010000000000000000000000000000
    887887  0000000000000000000000000000000040000c00800000000000000000000000
    888   00000000640000000000000000000000000000000a0001000000000000000000
     888  00000000640000000802000000000000000000000a0001000000000000000000
    889889  6000620005040406ac4d03000000000000000000000000000000000000000000
    890890  0000000000000000000000000000000000000000000000000000000000000000
     
    11511151      workgroupGroupSegmentSize=0
    11521152      gdsSegmentSize=100
    1153       kernargSegmentSize=0
     1153      kernargSegmentSize=96
    11541154      workgroupFbarrierCount=0
    11551155      wavefrontSgprCount=10
     
    11771177  0100000000000000010008000000030000010000000000000000000000000000
    11781178  0000000000000000000000000000000040000c00800000000000000000000000
    1179   00000000640000000000000000000000000000000a0001000000000000000000
     1179  00000000640000006000000000000000000000000a0001000000000000000000
    11801180  6000620005040406ac4d03000000000000000000000000000000000000000000
    11811181  0000000000000000000000000000000000000000000000000000000000000000
Note: See TracChangeset for help on using the changeset viewer.