Changeset 3712 in CLRX


Ignore:
Timestamp:
Feb 3, 2018, 1:41:12 PM (18 months ago)
Author:
matszpk
Message:

CLRadeonExtender: AmdCL2: Handle vectypehint and work_group_size_hint in kernel metadata. Add '.vectypehint' and '.work_group_size_hint' pseudo-ops
to AmdCL2 format handling. Add new pseudo-ops to editor's syntaxes.

Location:
CLRadeonExtender/trunk
Files:
13 edited

Legend:

Unmodified
Added
Removed
  • CLRadeonExtender/trunk/CLRX/amdbin/AmdCL2BinGen.h

    r3575 r3712  
    8080    bool useEnqueue; ///< this kernel enqueues other kernel
    8181    bool useGeneric;    ///< use generic pointer addresses (for flat instrs)
     82    CString vecTypeHint; ///< vectypehint
     83    uint32_t workGroupSizeHint[3];  ///< workGroupSizeHint
    8284   
    8385    size_t calculateKernelArgSize(bool is64Bit, bool newBinaries) const;
  • CLRadeonExtender/trunk/CLRX/amdbin/AmdCL2Binaries.h

    r3575 r3712  
    282282};
    283283
     284struct AmdCL2GPUMetadataHeaderEnd32
     285{
     286    uint32_t workGroupSizeHint[3];
     287    uint32_t vecTypeHintLength;
     288    uint32_t unused;
     289};
     290
    284291/// header for metadata
    285292struct AmdCL2GPUMetadataHeader64
     
    301308    uint64_t unknown5[2];
    302309    uint64_t argsNum;       ///< number of arguments
     310};
     311
     312struct AmdCL2GPUMetadataHeaderEnd64
     313{
     314    uint64_t workGroupSizeHint[3];
     315    uint64_t vecTypeHintLength;
     316    uint64_t unused;
    303317};
    304318
  • CLRadeonExtender/trunk/amdasm/AsmAmdCL2Format.cpp

    r3676 r3712  
    6565    "use_ptr64", "use_queue_ptr", "use_xnack_enabled",
    6666    "useargs", "useenqueue", "usegeneric",
    67     "userdatanum", "usesetup", "vgprsnum",
    68     "wavefront_sgpr_count", "wavefront_size",  "workgroup_fbarrier_count",
     67    "userdatanum", "usesetup", "vectypehint", "vgprsnum",
     68    "wavefront_sgpr_count", "wavefront_size",
     69    "work_group_size_hint", "workgroup_fbarrier_count",
    6970    "workgroup_group_segment_size", "workitem_private_segment_size",
    7071    "workitem_vgpr_count"
     
    105106    AMDCL2OP_USE_PTR64, AMDCL2OP_USE_QUEUE_PTR, AMDCL2OP_USE_XNACK_ENABLED,
    106107    AMDCL2OP_USEARGS, AMDCL2OP_USEENQUEUE, AMDCL2OP_USEGENERIC,
    107     AMDCL2OP_USERDATANUM, AMDCL2OP_USESETUP, AMDCL2OP_VGPRSNUM,
     108    AMDCL2OP_USERDATANUM, AMDCL2OP_USESETUP, AMDCL2OP_VECTYPEHINT, AMDCL2OP_VGPRSNUM,
    108109    AMDCL2OP_WAVEFRONT_SGPR_COUNT, AMDCL2OP_WAVEFRONT_SIZE,
     110    AMDCL2OP_WORK_GROUP_SIZE_HINT,
    109111    AMDCL2OP_WORKGROUP_FBARRIER_COUNT, AMDCL2OP_WORKGROUP_GROUP_SEGMENT_SIZE,
    110112    AMDCL2OP_WORKITEM_PRIVATE_SEGMENT_SIZE, AMDCL2OP_WORKITEM_VGPR_COUNT
     
    11491151        return;
    11501152    AmdCL2KernelConfig& config = handler.output.kernels[asmr.currentKernel].config;
     1153    // reqd_work_group_size
    11511154    config.reqdWorkGroupSize[0] = out[0];
    11521155    config.reqdWorkGroupSize[1] = out[1];
    11531156    config.reqdWorkGroupSize[2] = out[2];
     1157}
     1158
     1159void AsmAmdCL2PseudoOps::setWorkGroupSizeHint(AsmAmdCL2Handler& handler,
     1160                    const char* pseudoOpPlace, const char* linePtr)
     1161{
     1162    Assembler& asmr = handler.assembler;
     1163    const char* end = asmr.line + asmr.lineSize;
     1164    if (asmr.currentKernel==ASMKERN_GLOBAL || asmr.currentKernel==ASMKERN_INNER ||
     1165        asmr.sections[asmr.currentSection].type != AsmSectionType::CONFIG)
     1166        PSEUDOOP_RETURN_BY_ERROR("Illegal place of configuration pseudo-op")
     1167   
     1168    skipSpacesToEnd(linePtr, end);
     1169    uint64_t out[3] = { 0, 0, 0 };
     1170    // parse CWS (1-3 values)
     1171    if (!AsmAmdPseudoOps::parseCWS(asmr, pseudoOpPlace, linePtr, out))
     1172        return;
     1173    AmdCL2KernelConfig& config = handler.output.kernels[asmr.currentKernel].config;
     1174    // work group size hint
     1175    config.workGroupSizeHint[0] = out[0];
     1176    config.workGroupSizeHint[1] = out[1];
     1177    config.workGroupSizeHint[2] = out[2];
     1178}
     1179
     1180void AsmAmdCL2PseudoOps::setVecTypeHint(AsmAmdCL2Handler& handler,
     1181                    const char* pseudoOpPlace, const char* linePtr)
     1182{
     1183    Assembler& asmr = handler.assembler;
     1184    const char* end = asmr.line + asmr.lineSize;
     1185    if (asmr.currentKernel==ASMKERN_GLOBAL || asmr.currentKernel==ASMKERN_INNER ||
     1186        asmr.sections[asmr.currentSection].type != AsmSectionType::CONFIG)
     1187        PSEUDOOP_RETURN_BY_ERROR("Illegal place of configuration pseudo-op")
     1188   
     1189    CString vecTypeHint;
     1190    skipSpacesToEnd(linePtr, end);
     1191    bool good = getNameArg(asmr, vecTypeHint, linePtr, "vectypehint", true);
     1192    if (!good || !checkGarbagesAtEnd(asmr, linePtr))
     1193        return;
     1194   
     1195    AmdCL2KernelConfig& config = handler.output.kernels[asmr.currentKernel].config;
     1196    config.vecTypeHint = vecTypeHint;
    11541197}
    11551198
     
    16701713                       AMDCL2CVAL_USERDATANUM);
    16711714            break;
     1715        case AMDCL2OP_VECTYPEHINT:
     1716            AsmAmdCL2PseudoOps::setVecTypeHint(*this, stmtPlace, linePtr);
     1717            break;
    16721718        case AMDCL2OP_VGPRSNUM:
    16731719            AsmAmdCL2PseudoOps::setConfigValue(*this, stmtPlace, linePtr,
     
    16811727            AsmAmdCL2PseudoOps::setConfigValue(*this, stmtPlace, linePtr,
    16821728                             AMDCL2CVAL_WAVEFRONT_SIZE);
     1729            break;
     1730        case AMDCL2OP_WORK_GROUP_SIZE_HINT:
     1731            AsmAmdCL2PseudoOps::setWorkGroupSizeHint(*this, stmtPlace, linePtr);
    16831732            break;
    16841733        case AMDCL2OP_WORKITEM_VGPR_COUNT:
  • CLRadeonExtender/trunk/amdasm/AsmAmdCL2Internals.h

    r3575 r3712  
    174174    static void setUseGridWorkGroupCount(AsmAmdCL2Handler& handler,
    175175                      const char* pseudoOpPlace, const char* linePtr);
    176     // .cws (set reqd_work_group_size)
     176    // .cws (set reqd_work_group_size or workgroupsizehint)
    177177    static void setCWS(AsmAmdCL2Handler& handler, const char* pseudoOpPlace,
     178                      const char* linePtr);
     179    // .cws (set reqd_work_group_size or workgroupsizehint)
     180    static void setWorkGroupSizeHint(AsmAmdCL2Handler& handler, const char* pseudoOpPlace,
     181                      const char* linePtr);
     182    // .vectypehint
     183    static void setVecTypeHint(AsmAmdCL2Handler& handler, const char* pseudoOpPlace,
    178184                      const char* linePtr);
    179185    // .arg (kernel argument)
  • CLRadeonExtender/trunk/amdasm/DisasmAmdCL2.cpp

    r3575 r3712  
    4141    typedef AmdCL2MainGPUBinary32 AmdCL2MainBinary;
    4242    typedef AmdCL2GPUMetadataHeader32 MetadataHeader;
     43    typedef AmdCL2GPUMetadataHeaderEnd32 MetadataHeaderEnd;
    4344    typedef AmdCL2GPUKernelArgEntry32 KernelArgEntry;
     45    static const size_t newMetadataHeaderSize = 0xa4;
    4446};
    4547
     
    4850    typedef AmdCL2MainGPUBinary64 AmdCL2MainBinary;
    4951    typedef AmdCL2GPUMetadataHeader64 MetadataHeader;
     52    typedef AmdCL2GPUMetadataHeaderEnd64 MetadataHeaderEnd;
    5053    typedef AmdCL2GPUKernelArgEntry64 KernelArgEntry;
     54    static const size_t newMetadataHeaderSize = 0x110;
    5155};
    5256
     
    384388    for (size_t i = 0; i < 3; i++)
    385389        config.reqdWorkGroupSize[i] = ULEV(mdHdr->reqdWorkGroupSize[i]);
     390    for (size_t i = 0; i < 3; i++)
     391        config.workGroupSizeHint[i] = 0;
    386392   
    387393    if (setup != nullptr)
     
    432438    config.samplers.resize(std::unique(config.samplers.begin(), config.samplers.end()) -
    433439                config.samplers.begin());
     440   
     441    size_t vecTypeHintLength = 0;
     442    if (headerSize >= Types::newMetadataHeaderSize)
     443    {
     444        const typename Types::MetadataHeaderEnd* hdrEnd =
     445            reinterpret_cast<const typename Types::MetadataHeaderEnd*>(
     446                metadata +  Types::newMetadataHeaderSize -
     447                        sizeof(typename Types::MetadataHeaderEnd));
     448        for (cxuint k = 0; k < 3; k++)
     449            config.workGroupSizeHint[k] = ULEV(hdrEnd->workGroupSizeHint[k]);
     450        vecTypeHintLength = ULEV(hdrEnd->vecTypeHintLength);
     451    }
    434452    // get kernel args
    435453    size_t argOffset = headerSize + ULEV(mdHdr->firstNameLength) +
    436454            ULEV(mdHdr->secondNameLength)+2;
    437     if (ULEV(*(const uint32_t*)(metadata+argOffset)) ==
     455    if (vecTypeHintLength!=0 || ULEV(*(const uint32_t*)(metadata+argOffset)) ==
    438456                (sizeof(typename Types::KernelArgEntry)<<8))
    439         argOffset++;    // fix for AMD GPUPRO driver (2036.03) */
     457    {
     458        config.vecTypeHint.assign((const char*)metadata + argOffset, vecTypeHintLength);
     459        argOffset += vecTypeHintLength + 1;    // fix for AMD GPUPRO driver (2036.03) */
     460    }
     461   
    440462    const typename Types::KernelArgEntry* argPtr = reinterpret_cast<
    441463            const typename Types::KernelArgEntry*>(metadata + argOffset);
     
    687709        output.write(buf, bufSize);
    688710   
     711    bufSize = 0;
     712    // work group size hint
     713    if (config.workGroupSizeHint[2] != 0)
     714        bufSize = snprintf(buf, 100, "        .work_group_size_hint %u, %u, %u\n",
     715               config.workGroupSizeHint[0], config.workGroupSizeHint[1],
     716               config.workGroupSizeHint[2]);
     717    else if (config.workGroupSizeHint[1] != 0)
     718        bufSize = snprintf(buf, 100, "        .work_group_size_hint %u, %u\n",
     719               config.workGroupSizeHint[0], config.workGroupSizeHint[1]);
     720    else if (config.workGroupSizeHint[0] != 0)
     721        bufSize = snprintf(buf, 100, "        .work_group_size_hint %u\n",
     722                config.workGroupSizeHint[0]);
     723    if (bufSize != 0) // if we have cws
     724        output.write(buf, bufSize);
     725    if (!config.vecTypeHint.empty())
     726    {
     727        output.write("        .vectypehint ", 21);
     728        output.write(config.vecTypeHint.c_str(), config.vecTypeHint.size());
     729        output.write("\n", 1);
     730    }
     731   
    689732    if (!hsaConfig)
    690733    {
  • CLRadeonExtender/trunk/amdbin/AmdCL2BinGen.cpp

    r3575 r3712  
    4242    kernel.kernelName = kernelName;
    4343   
     44    for (size_t i = 0; i < 3; i++)
     45    {
     46        kernel.config.workGroupSizeHint[i] = 0;
     47        kernel.config.reqdWorkGroupSize[i] = 0;
     48    }
    4449    kernel.config.usedSGPRsNum = kernel.config.usedVGPRsNum = BINGEN_DEFAULT;
    4550    kernel.config.floatMode = 0xc0;
     
    602607            if (input->driverVersion >= 223600U)
    603608                out += ::strlen(amdcl2GPUArchNameWordTable[cxuint(arch)]) - 7;
     609            if (is16_3Ver && !kernel.config.vecTypeHint.empty())
     610                out += kernel.config.vecTypeHint.size();
    604611           
    605612            /// if kernels uses locals
     
    987994{
    988995    typedef AmdCL2GPUMetadataHeader32 MetadataHeader;
     996    typedef AmdCL2GPUMetadataHeaderEnd32 MetadataHeaderEnd;
    989997    typedef AmdCL2GPUKernelArgEntry32 KernelArgEntry;
    990998    static const size_t headerSize16_3Ver = 0xa4;
     
    10001008{
    10011009    typedef AmdCL2GPUMetadataHeader64 MetadataHeader;
     1010    typedef AmdCL2GPUMetadataHeaderEnd64 MetadataHeaderEnd;
    10021011    typedef AmdCL2GPUKernelArgEntry64 KernelArgEntry;
    10031012    static const size_t headerSize16_3Ver = 0x110;
     
    10951104        }
    10961105        if (is16_3Ver)
    1097             fob.fill(Types::headerEndSize, 0);
     1106        {
     1107            fob.fill(Types::headerEndSize - sizeof(typename Types::MetadataHeaderEnd), 0);
     1108            typename Types::MetadataHeaderEnd mthdrEnd;
     1109            mthdrEnd.vecTypeHintLength = config.vecTypeHint.size();
     1110            for (size_t i = 0; i < 3; i++)
     1111                SLEV(mthdrEnd.workGroupSizeHint[i], config.workGroupSizeHint[i]);
     1112            mthdrEnd.unused = 0;
     1113            fob.writeObject(mthdrEnd);
     1114        }
    10981115        // two null terminated strings
    10991116        fob.writeArray(22, "__OpenCL_dummy_kernel");
     
    11041121                           amdcl2GPUArchNameWordTable[cxuint(arch)]);
    11051122        if (is16_3Ver)
    1106             fob.writeObject<cxbyte>(0);
     1123            fob.writeArray(config.vecTypeHint.size()+1, config.vecTypeHint.c_str());
    11071124       
    11081125        // put argument entries
  • CLRadeonExtender/trunk/amdbin/AmdCL2Binaries.cpp

    r3575 r3712  
    351351    kernelHeader.data = metadata;
    352352    const uint32_t argsNum = ULEV(hdrStruc->argsNum);
     353    size_t vecTypeHintLength = 0;
     354    if (kernelHeader.size >= Types::newMetadataHeaderSize)
     355    {
     356        const typename Types::MetadataHeaderEnd* hdrEnd =
     357            reinterpret_cast<const typename Types::MetadataHeaderEnd*>(
     358                metadata +  Types::newMetadataHeaderSize -
     359                        sizeof(typename Types::MetadataHeaderEnd));
     360        vecTypeHintLength = ULEV(hdrEnd->vecTypeHintLength);
     361    }
    353362   
    354363    if (usumGt(ULEV(hdrStruc->firstNameLength), ULEV(hdrStruc->secondNameLength),
     
    359368            ULEV(hdrStruc->firstNameLength)+ULEV(hdrStruc->secondNameLength)+2;
    360369    // fix for latest Crimson drivers
    361     if (ULEV(*(const uint32_t*)(metadata+argOffset)) ==
     370    if (vecTypeHintLength!=0 || ULEV(*(const uint32_t*)(metadata+argOffset)) ==
    362371                (sizeof(typename Types::KernelArgEntry)<<8))
    363372    {
    364373        crimson16 = true;
    365         argOffset++;
     374        argOffset += vecTypeHintLength + 1;
    366375    }
    367376    const typename Types::KernelArgEntry* argPtr = reinterpret_cast<
     
    509518    typedef ElfBinary32 ElfBinary;
    510519    typedef AmdCL2GPUMetadataHeader32 MetadataHeader;
     520    typedef AmdCL2GPUMetadataHeaderEnd32 MetadataHeaderEnd;
    511521    typedef AmdCL2GPUKernelArgEntry32 KernelArgEntry;
     522    static const size_t newMetadataHeaderSize = 0xa4;
    512523};
    513524
     
    516527    typedef ElfBinary64 ElfBinary;
    517528    typedef AmdCL2GPUMetadataHeader64 MetadataHeader;
     529    typedef AmdCL2GPUMetadataHeaderEnd64 MetadataHeaderEnd;
    518530    typedef AmdCL2GPUKernelArgEntry64 KernelArgEntry;
     531    static const size_t newMetadataHeaderSize = 0x110;
    519532};
    520533
  • CLRadeonExtender/trunk/doc/ClrxAsmAmdCl2.md

    r3702 r3712  
    591591registers for USERDATA.
    592592
     593### .vectypehint
     594
     595Syntax: .vectypehint OPENCLTYPE
     596
     597This pseudo-operation must be inside any kernel configuration.
     598Set vectypehint for kernel. The argument is OpenCL type.
     599
    593600### .vgprsnum
    594601
     
    611618This pseudo-op must be inside kernel HSA configuration (`.hsaconfig`).
    612619Set `wavefront_size` field in kernel configuration. Value must be a power of two.
     620
     621### .work_group_size_hint
     622
     623Syntax: .work_group_size_hint SIZEHINT[, SIZEHINT[, SIZEHINT]]
     624
     625This pseudo-operation must be inside any kernel configuration.
     626Set work_group_size_hint for this kernel.
    613627
    614628### .workgroup_fbarrier_count
  • CLRadeonExtender/trunk/editors/clrx.vim

    r3676 r3712  
    313313syntax match asmPseudoOps "\.usesetup"
    314314syntax match asmPseudoOps "\.using"
     315syntax match asmPseudoOps "\.vectypehint"
    315316syntax match asmPseudoOps "\.version"
    316317syntax match asmPseudoOps "\.vgprsnum"
     
    321322syntax match asmPseudoOps "\.while"
    322323syntax match asmPseudoOps "\.word"
     324syntax match asmPseudoOps "\.work_group_size_hint"
    323325syntax match asmPseudoOps "\.workgroup_fbarrier_count"
    324326syntax match asmPseudoOps "\.workgroup_group_segment_size"
  • CLRadeonExtender/trunk/editors/gedit.lang

    r3676 r3712  
    354354            <keyword>usesetup</keyword>
    355355            <keyword>using</keyword>
     356            <keyword>vectypehint</keyword>
    356357            <keyword>version</keyword>
    357358            <keyword>vgprsnum</keyword>
     
    362363            <keyword>while</keyword>
    363364            <keyword>word</keyword>
     365            <keyword>work_group_size_hint</keyword>
    364366            <keyword>workgroup_fbarrier_count</keyword>
    365367            <keyword>workgroup_group_segment_size</keyword>
  • CLRadeonExtender/trunk/editors/kate.xml

    r3676 r3712  
    20622062            <item>.usesetup</item>
    20632063            <item>.using</item>
     2064            <item>.vectypehint</item>
    20642065            <item>.version</item>
    20652066            <item>.vgprsnum</item>
     
    20702071            <item>.while</item>
    20712072            <item>.word</item>
     2073            <item>.work_group_size_hint</item>
    20722074            <item>.workgroup_fbarrier_count</item>
    20732075            <item>.workgroup_group_segment_size</item>
  • CLRadeonExtender/trunk/editors/notepad++.xml

    r3676 r3712  
    16821682.usesetup
    16831683.using
     1684.vectypehint
    16841685.version
    16851686.vgprsnum
     
    16901691.while
    16911692.word
     1693.work_group_size_hint
    16921694.workgroup_fbarrier_count
    16931695.workgroup_group_segment_size
  • CLRadeonExtender/trunk/tests/amdbin/AmdCL2BinGen.cpp

    r3575 r3712  
    221221    typedef AmdCL2MainGPUBinary32 MainBinary;
    222222    typedef AmdCL2GPUMetadataHeader32 MetadataHeader;
     223    typedef AmdCL2GPUMetadataHeaderEnd32 MetadataHeaderEnd;
    223224    typedef AmdCL2GPUKernelArgEntry32 KernelArgEntry;
    224225    static const KernelArgType wordType = KernelArgType::INT;
     226    static const size_t newMetadataHeaderSize = 0xa4;
    225227};
    226228
     
    229231    typedef AmdCL2MainGPUBinary64 MainBinary;
    230232    typedef AmdCL2GPUMetadataHeader64 MetadataHeader;
     233    typedef AmdCL2GPUMetadataHeaderEnd64 MetadataHeaderEnd;
    231234    typedef AmdCL2GPUKernelArgEntry64 KernelArgEntry;
    232235    static const KernelArgType wordType = KernelArgType::LONG;
     236    static const size_t newMetadataHeaderSize = 0x110;
    233237};
    234238
     
    245249    for (size_t i = 0; i < 3; i++)
    246250        config.reqdWorkGroupSize[i] = ULEV(mdHdr->reqdWorkGroupSize[i]);
     251    for (size_t i = 0; i < 3; i++)
     252        config.workGroupSizeHint[i] = 0;
    247253    const IntAmdCL2SetupData* setupData =
    248254            reinterpret_cast<const IntAmdCL2SetupData*>(setup + 48);
     
    286292    config.samplers.resize(std::unique(config.samplers.begin(), config.samplers.end()) -
    287293                config.samplers.begin());
     294   
     295    size_t vecTypeHintLength = 0;
     296    if (headerSize >= Types::newMetadataHeaderSize)
     297    {
     298        const typename Types::MetadataHeaderEnd* hdrEnd =
     299            reinterpret_cast<const typename Types::MetadataHeaderEnd*>(
     300                metadata +  Types::newMetadataHeaderSize -
     301                        sizeof(typename Types::MetadataHeaderEnd));
     302        for (cxuint k = 0; k < 3; k++)
     303            config.workGroupSizeHint[k] = ULEV(hdrEnd->workGroupSizeHint[k]);
     304        vecTypeHintLength = ULEV(hdrEnd->vecTypeHintLength);
     305    }
    288306    // get kernel args
    289307    size_t argOffset = headerSize + ULEV(mdHdr->firstNameLength) +
    290308            ULEV(mdHdr->secondNameLength)+2;
    291     if (ULEV(*((const uint32_t*)(metadata+argOffset))) ==
     309    if (vecTypeHintLength!=0 || ULEV(*((const uint32_t*)(metadata+argOffset))) ==
    292310            (sizeof(typename Types::KernelArgEntry)<<8))
    293         argOffset++;
     311    {
     312        config.vecTypeHint.assign((const char*)metadata + argOffset, vecTypeHintLength);
     313        argOffset += vecTypeHintLength+1;
     314    }
    294315    const typename Types::KernelArgEntry* argPtr = reinterpret_cast<
    295316            const typename Types::KernelArgEntry*>(metadata + argOffset);
Note: See TracChangeset for help on using the changeset viewer.