Changeset 3312 in CLRX


Ignore:
Timestamp:
Sep 5, 2017, 7:24:42 PM (4 months ago)
Author:
matszpk
Message:

CLRadeonExtender: DisasmAmdCL2: Add dumping a kernel setup as AMD HSA configuration. Add '-H' '--HSAConfig' option to clrxdisasm.
Docs: Add a missing descriptions of options to clrxdisasm.

Location:
CLRadeonExtender/trunk
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • CLRadeonExtender/trunk/CLRX/amdasm/Disassembler.h

    r3274 r3312  
    5656    DISASM_SETUP = 64,
    5757    DISASM_CONFIG = 128,    ///< print kernel configuration instead raw data
    58     DISASM_BUGGYFPLIT = 256,
    59     DISASM_CODEPOS = 512,   ///< print code position
     58    DISASM_BUGGYFPLIT = 0x100,
     59    DISASM_CODEPOS = 0x200,   ///< print code position
     60    DISASM_HSACONFIG = 0x400,  ///< print HSA configuration
    6061   
    6162    ///< all disassembler flags (without config)
    62     DISASM_ALL = FLAGS_ALL&(~(DISASM_CONFIG|DISASM_BUGGYFPLIT))
     63    DISASM_ALL = FLAGS_ALL&(~(DISASM_CONFIG|DISASM_BUGGYFPLIT|DISASM_HSACONFIG))
    6364};
    6465
  • CLRadeonExtender/trunk/amdasm/DisasmAmdCL2.cpp

    r3121 r3312  
    367367    for (size_t i = 0; i < 3; i++)
    368368        config.reqdWorkGroupSize[i] = ULEV(mdHdr->reqdWorkGroupSize[i]);
    369     const IntAmdCL2SetupData* setupData =
    370             reinterpret_cast<const IntAmdCL2SetupData*>(setup + 48);
    371     uint32_t pgmRSRC1 = ULEV(setupData->pgmRSRC1);
    372     uint32_t pgmRSRC2 = ULEV(setupData->pgmRSRC2);
    373     /* initializing fields from PGM_RSRC1 and PGM_RSRC2 */
    374     config.dimMask = (pgmRSRC2>>7)&7;
    375     config.ieeeMode = (pgmRSRC1>>23)&1;
    376     config.exceptions = (pgmRSRC2>>24)&0xff;
    377     config.floatMode = (pgmRSRC1>>12)&0xff;
    378     config.priority = (pgmRSRC1>>10)&3;
    379     config.tgSize = (pgmRSRC2>>10)&1;
    380     config.privilegedMode = (pgmRSRC1>>20)&1;
    381     config.dx10Clamp = (pgmRSRC1>>21)&1;
    382     config.debugMode = (pgmRSRC1>>22)&1;
    383     config.pgmRSRC2 = pgmRSRC2;
    384     config.pgmRSRC1 = pgmRSRC1;
    385     config.usedVGPRsNum = ULEV(setupData->vgprsNum);
    386     config.usedSGPRsNum = ULEV(setupData->sgprsNum);
    387     config.scratchBufferSize = ULEV(setupData->scratchBufferSize);
    388     config.localSize = ULEV(setupData->localSize);
    389     config.gdsSize = ULEV(setupData->gdsSize);
    390     uint16_t ksetup1 = ULEV(setupData->setup1);
    391     config.useSetup = (ksetup1&2)!=0;
    392     config.useArgs = (ksetup1&8)!=0;
    393     config.useGeneric = config.useEnqueue = false;
    394     if (ksetup1==0x2f) // if generic pointer support
    395         config.useGeneric = true;
    396     else if (!isGCN14)
    397         config.useEnqueue = (ksetup1&0x20)!=0;
    398     else // for GFX9 - check number of all registers must be 6+
    399         config.useEnqueue = (setupData->sgprsNum+6 == setupData->sgprsNumAll);
     369   
     370    if (setup != nullptr)
     371    {   // if passed to this function
     372        const IntAmdCL2SetupData* setupData =
     373                reinterpret_cast<const IntAmdCL2SetupData*>(setup + 48);
     374        uint32_t pgmRSRC1 = ULEV(setupData->pgmRSRC1);
     375        uint32_t pgmRSRC2 = ULEV(setupData->pgmRSRC2);
     376        /* initializing fields from PGM_RSRC1 and PGM_RSRC2 */
     377        config.dimMask = (pgmRSRC2>>7)&7;
     378        config.ieeeMode = (pgmRSRC1>>23)&1;
     379        config.exceptions = (pgmRSRC2>>24)&0xff;
     380        config.floatMode = (pgmRSRC1>>12)&0xff;
     381        config.priority = (pgmRSRC1>>10)&3;
     382        config.tgSize = (pgmRSRC2>>10)&1;
     383        config.privilegedMode = (pgmRSRC1>>20)&1;
     384        config.dx10Clamp = (pgmRSRC1>>21)&1;
     385        config.debugMode = (pgmRSRC1>>22)&1;
     386        config.pgmRSRC2 = pgmRSRC2;
     387        config.pgmRSRC1 = pgmRSRC1;
     388        config.usedVGPRsNum = ULEV(setupData->vgprsNum);
     389        config.usedSGPRsNum = ULEV(setupData->sgprsNum);
     390        config.scratchBufferSize = ULEV(setupData->scratchBufferSize);
     391        config.localSize = ULEV(setupData->localSize);
     392        config.gdsSize = ULEV(setupData->gdsSize);
     393        uint16_t ksetup1 = ULEV(setupData->setup1);
     394        config.useSetup = (ksetup1&2)!=0;
     395        config.useArgs = (ksetup1&8)!=0;
     396        config.useGeneric = config.useEnqueue = false;
     397        if (ksetup1==0x2f) // if generic pointer support
     398            config.useGeneric = true;
     399        else if (!isGCN14)
     400            config.useEnqueue = (ksetup1&0x20)!=0;
     401        else // for GFX9 - check number of all registers must be 6+
     402            config.useEnqueue = (setupData->sgprsNum+6 == setupData->sgprsNumAll);
     403    }
    400404   
    401405    // get samplers
     
    614618}
    615619
    616 static void dumpAmdCL2KernelConfig(std::ostream& output, const AmdCL2KernelConfig& config)
     620static void dumpAmdCL2KernelConfig(std::ostream& output,
     621                    const AmdCL2KernelConfig& config, bool hsaConfig)
    617622{
    618623    size_t bufSize;
    619624    char buf[100];
    620     output.write("    .config\n", 12);
    621     if (config.dimMask != BINGEN_DEFAULT)
    622     {
    623         strcpy(buf, "        .dims ");
    624         bufSize = 14;
    625         if ((config.dimMask & 1) != 0)
    626             buf[bufSize++] = 'x';
    627         if ((config.dimMask & 2) != 0)
    628             buf[bufSize++] = 'y';
    629         if ((config.dimMask & 4) != 0)
    630             buf[bufSize++] = 'z';
    631         buf[bufSize++] = '\n';
    632         output.write(buf, bufSize);
     625    if (hsaConfig)
     626        output.write("    .hsaconfig\n", 15);
     627    else
     628        output.write("    .config\n", 12);
     629   
     630    if (!hsaConfig)
     631    {   // do not print old-config style params if HSA config enabled
     632        if (config.dimMask != BINGEN_DEFAULT)
     633        {
     634            strcpy(buf, "        .dims ");
     635            bufSize = 14;
     636            if ((config.dimMask & 1) != 0)
     637                buf[bufSize++] = 'x';
     638            if ((config.dimMask & 2) != 0)
     639                buf[bufSize++] = 'y';
     640            if ((config.dimMask & 4) != 0)
     641                buf[bufSize++] = 'z';
     642            buf[bufSize++] = '\n';
     643            output.write(buf, bufSize);
     644        }
    633645    }
    634646    bufSize = 0;
     
    645657        output.write(buf, bufSize);
    646658   
    647     bufSize = snprintf(buf, 100, "        .sgprsnum %u\n", config.usedSGPRsNum);
    648     output.write(buf, bufSize);
    649     bufSize = snprintf(buf, 100, "        .vgprsnum %u\n", config.usedVGPRsNum);
    650     output.write(buf, bufSize);
    651    
    652     if (config.localSize!=0)
    653     {
    654         bufSize = snprintf(buf, 100, "        .localsize %" PRIu64 "\n",
    655                        uint64_t(config.localSize));
     659    if (!hsaConfig)
     660    {   // do not print old-config style params if HSA config enabled
     661        bufSize = snprintf(buf, 100, "        .sgprsnum %u\n", config.usedSGPRsNum);
    656662        output.write(buf, bufSize);
    657     }
    658     if (config.gdsSize!=0)
    659     {
    660         bufSize = snprintf(buf, 100, "        .gdssize %u\n", config.gdsSize);
     663        bufSize = snprintf(buf, 100, "        .vgprsnum %u\n", config.usedVGPRsNum);
    661664        output.write(buf, bufSize);
    662     }
    663     bufSize = snprintf(buf, 100, "        .floatmode 0x%02x\n", config.floatMode);
    664     output.write(buf, bufSize);
    665     if (config.scratchBufferSize!=0)
    666     {
    667         bufSize = snprintf(buf, 100, "        .scratchbuffer %u\n",
    668                            config.scratchBufferSize);
     665       
     666        if (config.localSize!=0)
     667        {
     668            bufSize = snprintf(buf, 100, "        .localsize %" PRIu64 "\n",
     669                        uint64_t(config.localSize));
     670            output.write(buf, bufSize);
     671        }
     672        if (config.gdsSize!=0)
     673        {
     674            bufSize = snprintf(buf, 100, "        .gdssize %u\n", config.gdsSize);
     675            output.write(buf, bufSize);
     676        }
     677        bufSize = snprintf(buf, 100, "        .floatmode 0x%02x\n", config.floatMode);
    669678        output.write(buf, bufSize);
    670     }
    671     bufSize = snprintf(buf, 100, "        .pgmrsrc1 0x%08x\n", config.pgmRSRC1);
    672     output.write(buf, bufSize);
    673     bufSize = snprintf(buf, 100, "        .pgmrsrc2 0x%08x\n", config.pgmRSRC2);
    674     output.write(buf, bufSize);
    675     if (config.privilegedMode)
    676         output.write("        .privmode\n", 18);
    677     if (config.debugMode)
    678         output.write("        .debugmode\n", 19);
    679     if (config.dx10Clamp)
    680         output.write("        .dx10clamp\n", 19);
    681     if (config.ieeeMode)
    682         output.write("        .ieeemode\n", 18);
    683     if (config.tgSize)
    684         output.write("        .tgsize\n", 16);
    685     if ((config.exceptions & 0x7f) != 0)
    686     {
    687         bufSize = snprintf(buf, 100, "        .exceptions 0x%02x\n",
    688                    cxuint(config.exceptions));
     679        if (config.scratchBufferSize!=0)
     680        {
     681            bufSize = snprintf(buf, 100, "        .scratchbuffer %u\n",
     682                            config.scratchBufferSize);
     683            output.write(buf, bufSize);
     684        }
     685        bufSize = snprintf(buf, 100, "        .pgmrsrc1 0x%08x\n", config.pgmRSRC1);
    689686        output.write(buf, bufSize);
    690     }
    691     if (config.useArgs)
    692         output.write("        .useargs\n", 17);
    693     if (config.useSetup)
    694         output.write("        .usesetup\n", 18);
    695     if (config.useEnqueue)
    696         output.write("        .useenqueue\n", 20);
    697     if (config.useGeneric)
    698         output.write("        .usegeneric\n", 20);
    699     bufSize = snprintf(buf, 100, "        .priority %u\n", config.priority);
    700     output.write(buf, bufSize);
     687        bufSize = snprintf(buf, 100, "        .pgmrsrc2 0x%08x\n", config.pgmRSRC2);
     688        output.write(buf, bufSize);
     689        if (config.privilegedMode)
     690            output.write("        .privmode\n", 18);
     691        if (config.debugMode)
     692            output.write("        .debugmode\n", 19);
     693        if (config.dx10Clamp)
     694            output.write("        .dx10clamp\n", 19);
     695        if (config.ieeeMode)
     696            output.write("        .ieeemode\n", 18);
     697        if (config.tgSize)
     698            output.write("        .tgsize\n", 16);
     699        if ((config.exceptions & 0x7f) != 0)
     700        {
     701            bufSize = snprintf(buf, 100, "        .exceptions 0x%02x\n",
     702                    cxuint(config.exceptions));
     703            output.write(buf, bufSize);
     704        }
     705        if (config.useArgs)
     706            output.write("        .useargs\n", 17);
     707        if (config.useSetup)
     708            output.write("        .usesetup\n", 18);
     709        if (config.useEnqueue)
     710            output.write("        .useenqueue\n", 20);
     711        if (config.useGeneric)
     712            output.write("        .usegeneric\n", 20);
     713        bufSize = snprintf(buf, 100, "        .priority %u\n", config.priority);
     714        output.write(buf, bufSize);
     715    }
     716}
     717
     718static void dumpAmdCL2ArgsAndSamplers(std::ostream& output,
     719                    const AmdCL2KernelConfig& config)
     720{
     721    size_t bufSize;
     722    char buf[100];
    701723    // arguments
    702724    for (const AmdKernelArgInput& arg: config.args)
     
    718740    const bool doDumpConfig = ((flags & DISASM_CONFIG) != 0);
    719741    const bool doSetup = ((flags & DISASM_SETUP) != 0);
     742    const bool doHSAConfig = ((flags & DISASM_HSACONFIG) != 0);
    720743   
    721744    if (amdCL2Input->is64BitMode)
     
    820843    }
    821844   
     845    const GPUArchitecture arch = getGPUArchitectureFromDeviceType(amdCL2Input->deviceType);
     846    const cxuint maxSgprsNum = getGPUMaxRegistersNum(arch, REGTYPE_SGPR, 0);
     847   
    822848    for (const AmdCL2DisasmKernelInput& kinput: amdCL2Input->kernels)
    823849    {
     
    856882            const bool isGCN14 = getGPUArchitectureFromDeviceType(
    857883                        amdCL2Input->deviceType) >= GPUArchitecture::GCN1_4;
    858             AmdCL2KernelConfig config;
     884            AmdCL2KernelConfig config{};
     885            // get kernel config
    859886            if (amdCL2Input->is64BitMode)
    860887                config = genKernelConfig<AmdCL2Types64>(kinput.metadataSize,
    861                         kinput.metadata, kinput.setupSize, kinput.setup, samplerOffsets,
     888                        kinput.metadata, kinput.setupSize,
     889                        (doHSAConfig ? nullptr : kinput.setup), samplerOffsets,
    862890                        kinput.textRelocs, isGCN14);
    863891            else
    864892                config = genKernelConfig<AmdCL2Types32>(kinput.metadataSize,
    865                         kinput.metadata, kinput.setupSize, kinput.setup, samplerOffsets,
     893                        kinput.metadata, kinput.setupSize,
     894                        (doHSAConfig ? nullptr : kinput.setup), samplerOffsets,
    866895                        kinput.textRelocs, isGCN14);
    867             dumpAmdCL2KernelConfig(output, config);
     896           
     897            dumpAmdCL2KernelConfig(output, config, doHSAConfig);
     898            if (doHSAConfig)
     899            {   // print as HSA config
     900                dumpAMDHSAConfig(output, maxSgprsNum, arch,
     901                     *reinterpret_cast<const AmdHsaKernelConfig*>(kinput.setup));
     902                output.write("    .hsaconfig\n", 15);
     903            }
     904           
     905            dumpAmdCL2ArgsAndSamplers(output, config);
    868906        }
    869907       
  • CLRadeonExtender/trunk/doc/ClrxDisasm.md

    r3272 r3312  
    1414The `clrxdisasm` can be invoked in following way:
    1515
    16 clrxdisasm [-mdcCfhar?] [-g GPUDEVICE] [-a ARCH] [-t VERSION] [--metadata] [--data]
    17 [--calNotes] [--config] [--floats] [--hexcode] [--all] [--raw] [--gpuType=GPUDEVICE]
    18 [--arch=ARCH] [--driverVersion=VERSION] [--llvmVersion=VERSION] [--buggyFPLit]
    19 [--help] [--usage] [--version] [file...]
     16clrxdisasm [-mdcCfsHhar?] [-g GPUDEVICE] [-a ARCH] [-t VERSION] [--metadata] [--data]
     17[--calNotes] [--config] [--floats] [--hexcode] [--setup] [--HSAConfig] [--all]
     18[--raw] [--gpuType=GPUDEVICE] [--arch=ARCH] [--driverVersion=VERSION]
     19[--llvmVersion=VERSION] [--buggyFPLit] [--help] [--usage] [--version] [file...]
    2020
    2121### Program Options
     
    5252    Print hexadecimal code before disassembled instruction in comment. Hexadecimal code
    5353will be printed in 4-byte words.
     54
     55* **-s**, **--setup**
     56
     57    Print AMD OpenCL 2.0 kernel setup data.
     58
     59* **-H*, **--HSAConfig**
     60
     61    Print AMD OpenCL 2.0 kernel setup configuration as AMD HSA configuration.
    5462
    5563* **-a**, **--all**
  • CLRadeonExtender/trunk/programs/clrxdisasm.cpp

    r3273 r3312  
    3838    { "config", 'C', CLIArgType::NONE, false, false, "dump kernel configuration", nullptr },
    3939    { "setup", 's', CLIArgType::NONE, false, false, "dump kernel setup", nullptr },
     40    { "HSAConfig", 'H', CLIArgType::NONE, false, false, "dump kernel HSA config", nullptr },
    4041    { "floats", 'f', CLIArgType::NONE, false, false, "display float literals", nullptr },
    4142    { "hexcode", 'h', CLIArgType::NONE, false, false,
     
    8384            (cli.hasShortOption('h')?DISASM_HEXCODE:0);
    8485     disasmFlags |= (cli.hasShortOption('C')?DISASM_CONFIG:0) |
    85              (cli.hasLongOption("buggyFPLit")?DISASM_BUGGYFPLIT:0);
     86             (cli.hasLongOption("buggyFPLit")?DISASM_BUGGYFPLIT:0) |
     87             (cli.hasShortOption('H')?DISASM_HSACONFIG:0);
    8688   
    8789    GPUDeviceType gpuDeviceType = GPUDeviceType::CAPE_VERDE;
  • CLRadeonExtender/trunk/programs/clrxdisasm.pod

    r3272 r3312  
    77=head1 SYNOPSIS
    88
    9 clrxdisasm [-mdcCfhar?] [-g GPUDEVICE] [-a ARCH] [-t VERSION] [--metadata] [--data]
    10 [--calNotes] [--config] [--floats] [--hexcode] [--all] [--raw] [--gpuType=GPUDEVICE]
    11 [--arch=ARCH] [--driverVersion=VERSION] [--llvmVersion=VERSION] [--buggyFPLit]
    12 [--help] [--usage] [--version] [file...]
     9clrxdisasm [-mdcCfsHhar?] [-g GPUDEVICE] [-a ARCH] [-t VERSION] [--metadata] [--data]
     10[--calNotes] [--config] [--floats] [--hexcode] [--all] [--setup] [--HSAConfig
     11[--raw] [--gpuType=GPUDEVICE] [--arch=ARCH] [--driverVersion=VERSION]
     12[--llvmVersion=VERSION] [--buggyFPLit] [--help] [--usage] [--version] [file...]
    1313
    1414=head1 DESCRIPTION
     
    4848
    4949Print human-readable configuration instead of metadatas, headers and ATI CAL notes.
     50
     51=item B<-s>, B<--setup>
     52
     53Print AMD OpenCL 2.0 kernel setup data.
     54
     55=item B<-H>, B<--HSAConfig>
     56
     57Print AMD OpenCL 2.0 kernel setup configuration as AMD HSA configuration.
    5058
    5159=item B<-f>, B<--float>
Note: See TracChangeset for help on using the changeset viewer.