Changeset 3308 in CLRX


Ignore:
Timestamp:
Sep 5, 2017, 12:34:56 PM (4 months ago)
Author:
matszpk
Message:

CLRadeonExtender: AsmGallium?: Get extra allocation register flags from special AMDHSA feature flags.
CLRXDocs: Fixed info about allocation SGPR register in ROCm documentation.
Add extra info about allocation extra SGPR registers in GalliumCompute? doc.

Location:
CLRadeonExtender/trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • CLRadeonExtender/trunk/amdasm/AsmGalliumFormat.cpp

    r3306 r3308  
    18001800    }
    18011801   
     1802    cxuint llvmVersion = assembler.llvmVersion;
     1803    if (llvmVersion == 0 && (assembler.flags&ASM_TESTRUN)==0)
     1804        llvmVersion = detectedLLVMVersion;
     1805   
    18021806    GPUArchitecture arch = getGPUArchitectureFromDeviceType(assembler.deviceType);
    18031807    // set up number of the allocated SGPRs and VGPRs for kernel
     
    18311835        if (config.usedSGPRsNum==BINGEN_DEFAULT)
    18321836        {
     1837            cxuint allocFlags = kernelStates[i]->allocRegFlags;
     1838            if (llvmVersion >= 40000U)
     1839            {   // fix alloc reg flags for AMD HSA (such as ROCm)
     1840                const AmdHsaKernelConfig& hsaConfig  = *kernelStates[i]->config.get();
     1841                allocFlags = kernelStates[i]->allocRegFlags |
     1842                    // flat_scratch_init
     1843                    ((hsaConfig.enableSgprRegisterFlags&
     1844                            AMDHSAFLAG_USE_FLAT_SCRATCH_INIT)!=0? GCN_FLAT : 0) |
     1845                    // enable_xnack
     1846                    ((hsaConfig.enableFeatureFlags&AMDHSAFLAG_USE_XNACK_ENABLED)!=0 ?
     1847                                GCN_XNACK : 0);
     1848            }
     1849           
    18331850            config.usedSGPRsNum = std::min(
    18341851                std::max(minRegsNum[0], kernelStates[i]->allocRegs[0]) +
    1835                     getGPUExtraRegsNum(arch, REGTYPE_SGPR,
    1836                         kernelStates[i]->allocRegFlags|GCN_VCC),
     1852                    getGPUExtraRegsNum(arch, REGTYPE_SGPR, allocFlags|GCN_VCC),
    18371853                    maxSGPRsNum); // include all extra sgprs
    18381854        }
     
    18771893    AsmSection& asmCSection = assembler.sections[codeSection];
    18781894    const AsmSymbolMap& symbolMap = assembler.getSymbolMap();
    1879    
    1880     cxuint llvmVersion = assembler.llvmVersion;
    1881     if (llvmVersion == 0 && (assembler.flags&ASM_TESTRUN)==0)
    1882         llvmVersion = detectedLLVMVersion;
    18831895   
    18841896    const cxuint ldsShift = arch<GPUArchitecture::GCN1_1 ? 8 : 9;
  • CLRadeonExtender/trunk/doc/ClrxAsmGallium.md

    r3296 r3308  
    3030(VCC, FLAT_SCRATCH, XNACK_MASK) if any used to register pool.
    3131 The VCC register is included by default.
     32In AMDHSA configuration (LLVM >= 4.0.0) then special fields determines
     33what extra SGPR extra has been added.
    3234
    3335## List of the specific pseudo-operations
  • CLRadeonExtender/trunk/doc/ClrxAsmRocm.md

    r3287 r3308  
    2828
    2929Assembler for ROCm format counts all SGPR registers and add extra registers
    30 (FLAT_SCRATCH, XNACK_MASK) if any used to register pool. Special fields determines
     30(FLAT_SCRATCH, XNACK_MASK). Special fields determines
    3131what extra SGPR extra has been added. The VCC register is included by default.
    3232
  • CLRadeonExtender/trunk/tests/amdasm/AsmGalliumFormat.cpp

    r3286 r3308  
    428428    },
    429429    /* AMD HSA */
    430     /* 1 - gallium (configured proginfo) */
     430    /* 3 - gallium (configured proginfo and AMDHSA) */
    431431    { R"ffDXD(            .gallium
    432432        .llvm_version 40000
     
    584584)ffDXD", "", true
    585585    },
     586    /* 3 - gallium - alloc reg flags (extra SGPR registers) */
     587    { R"ffDXD(            .gallium
     588        .gpu Fiji
     589        .llvm_version 40000
     590            .kernel aa22
     591            .args
     592            .arg scalar, 8,,,SEXT,griddim
     593            .config
     594            .priority 1
     595            .floatmode 43
     596            .ieeemode
     597            .vgprsnum 139
     598            .pgmrsrc2 523243
     599            .scratchbuffer 230
     600            .use_flat_scratch_init
     601           
     602            .call_convention 0x34dac
     603            .debug_private_segment_buffer_sgpr 98
     604            .debug_wavefront_private_segment_offset_sgpr 96
     605            .gds_segment_size 100
     606            .kernarg_segment_align 32
     607            .workgroup_group_segment_size 22
     608            .workgroup_fbarrier_count 3324
     609    .text
     610aa22:
     611    .skip 256
     612    s_mov_b32 s54, 455
     613)ffDXD", R"ffDXD(GalliumBinDump:
     614  Kernel: name=aa22, offset=0
     615    Config:
     616      dims=default, SGPRS=61, VGPRS=139, pgmRSRC2=0x7fbeb, ieeeMode=0x1
     617      floatMode=0x2b, priority=1, localSize=0, scratchBuffer=230
     618    AMD HSA Config:
     619      amdCodeVersion=1.1
     620      amdMachine=1:8:0:3
     621      kernelCodeEntryOffset=256
     622      kernelCodePrefetchOffset=0
     623      kernelCodePrefetchSize=0
     624      maxScrachBackingMemorySize=0
     625      computePgmRsrc1=0x8eb5e2
     626      computePgmRsrc2=0x7fbd1
     627      enableSgprRegisterFlags=0x20
     628      enableFeatureFlags=0xa
     629      workitemPrivateSegmentSize=230
     630      workgroupGroupSegmentSize=22
     631      gdsSegmentSize=100
     632      kernargSegmentSize=16
     633      workgroupFbarrierCount=3324
     634      wavefrontSgprCount=61
     635      workitemVgprCount=139
     636      reservedVgprFirst=0
     637      reservedVgprCount=0
     638      reservedSgprFirst=0
     639      reservedSgprCount=0
     640      debugWavefrontPrivateSegmentOffsetSgpr=96
     641      debugPrivateSegmentBufferSgpr=98
     642      kernargSegmentAlignment=5
     643      groupSegmentAlignment=4
     644      privateSegmentAlignment=4
     645      wavefrontSize=6
     646      callConvention=0x34dac
     647      runtimeLoaderKernelSymbol=0x0
     648      ControlDirective:
     649      0000000000000000000000000000000000000000000000000000000000000000
     650      0000000000000000000000000000000000000000000000000000000000000000
     651      0000000000000000000000000000000000000000000000000000000000000000
     652      0000000000000000000000000000000000000000000000000000000000000000
     653    Arg: scalar, true, griddim, size=8, tgtSize=8, tgtAlign=8
     654  Comment:
     655  nullptr
     656  GlobalData:
     657  nullptr
     658  Code:
     659  0100000000000000010008000000030000010000000000000000000000000000
     660  00000000000000000000000000000000e2b58e00d1fb070020000a00e6000000
     661  16000000640000001000000000000000fc0c00003d008b000000000000000000
     662  6000620005040406ac4d03000000000000000000000000000000000000000000
     663  0000000000000000000000000000000000000000000000000000000000000000
     664  0000000000000000000000000000000000000000000000000000000000000000
     665  0000000000000000000000000000000000000000000000000000000000000000
     666  0000000000000000000000000000000000000000000000000000000000000000
     667  ff00b6bec7010000
     668)ffDXD", "", true
     669    }
    586670};
    587671
Note: See TracChangeset for help on using the changeset viewer.