Changeset 2520 in CLRX


Ignore:
Timestamp:
Nov 1, 2016, 5:59:32 PM (4 years ago)
Author:
matszpk
Message:

CLRadeonExtender: Tentative version of ROCm disassembler (doesn't work for almost samples).

Location:
CLRadeonExtender/trunk
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • CLRadeonExtender/trunk/CLRX/amdasm/Disassembler.h

    r2519 r2520  
    8787   
    8888    Disassembler& disassembler; ///< disassembler instance
     89    size_t startOffset; ///< start offset
    8990    size_t inputSize;   ///< size of input
    9091    const cxbyte* input;    ///< input code
     92    bool dontPrintLabelsAfterCode;
    9193    std::vector<size_t> labels; ///< list of local labels
    9294    std::vector<std::pair<size_t, CString> > namedLabels;   ///< named labels
     
    111113   
    112114    /// set input code
    113     void setInput(size_t inputSize, const cxbyte* input)
     115    void setInput(size_t inputSize, const cxbyte* input, size_t startOffset = 0)
    114116    {
    115117        this->inputSize = inputSize;
    116118        this->input = input;
     119        this->startOffset = startOffset;
    117120    }
    118 
     121   
     122    void setDontPrintLabels(bool after)
     123    { dontPrintLabelsAfterCode = after; }
     124   
    119125    /// makes some things before disassemblying
    120126    virtual void beforeDisassemble() = 0;
  • CLRadeonExtender/trunk/amdasm/DisasmROCm.cpp

    r2519 r2520  
    4141    uint32_t minor;
    4242    uint32_t stepping;
     43    GPUDeviceType deviceType;
    4344};
    4445
    4546static const AMDGPUArchValues amdGpuArchValuesTbl[] =
    4647{
    47     { 0, 0, 0 }, // GPUDeviceType::CAPE_VERDE
    48     { 0, 0, 0 }, // GPUDeviceType::PITCAIRN
    49     { 0, 0, 0 }, // GPUDeviceType::TAHITI
    50     { 0, 0, 0 }, // GPUDeviceType::OLAND
    51     { 7, 0, 0 }, // GPUDeviceType::BONAIRE
    52     { 7, 0, 0 }, // GPUDeviceType::SPECTRE
    53     { 7, 0, 0 }, // GPUDeviceType::SPOOKY
    54     { 7, 0, 0 }, // GPUDeviceType::KALINDI
    55     { 0, 0, 0 }, // GPUDeviceType::HAINAN
    56     { 7, 0, 1 }, // GPUDeviceType::HAWAII
    57     { 8, 0, 0 }, // GPUDeviceType::ICELAND
    58     { 8, 0, 0 }, // GPUDeviceType::TONGA
    59     { 7, 0, 0 }, // GPUDeviceType::MULLINS
    60     { 8, 0, 4 }, // GPUDeviceType::FIJI
    61     { 8, 0, 1 }, // GPUDeviceType::CARRIZO
    62     { 0, 0, 0 }, // GPUDeviceType::DUMMY
    63     { 0, 0, 0 }, // GPUDeviceType::GOOSE
    64     { 0, 0, 0 }, // GPUDeviceType::HORSE
    65     { 8, 1, 0 }, // GPUDeviceType::STONEY
    66     { 8, 0, 4 }, // GPUDeviceType::ELLESMERE
    67     { 8, 0, 4 } // GPUDeviceType::BAFFIN
     48    { 0, 0, 0, GPUDeviceType::CAPE_VERDE },
     49    { 7, 0, 0, GPUDeviceType::BONAIRE },
     50    { 7, 0, 1, GPUDeviceType::HAWAII },
     51    { 8, 0, 0, GPUDeviceType::ICELAND },
     52    { 8, 0, 1, GPUDeviceType::CARRIZO },
     53    { 8, 0, 2, GPUDeviceType::ICELAND },
     54    { 8, 0, 3, GPUDeviceType::FIJI },
     55    { 8, 0, 4, GPUDeviceType::FIJI },
     56    { 8, 1, 0, GPUDeviceType::STONEY }
    6857};
     58
     59static const size_t amdGpuArchValuesNum = sizeof(amdGpuArchValuesTbl) /
     60                sizeof(AMDGPUArchValues);
    6961
    7062ROCmDisasmInput* CLRX::getROCmDisasmInputFromBinary(const ROCmBinary& binary)
     
    7264    std::unique_ptr<ROCmDisasmInput> input(new ROCmDisasmInput);
    7365    uint32_t archMajor = 0;
     66    input->archMinor = 0;
     67    input->archStepping = 0;
    7468   
    7569    {
     
    9892    }
    9993    // determine device type
    100     std::cout << "archmajor: " << archMajor << ", archminor: " << input->archMinor <<
    101             ", archstepping: " << input->archStepping << "\n";
    102     cxuint deviceNumber = 0;
    103     for (deviceNumber = 0; deviceNumber <= cxuint(GPUDeviceType::GPUDEVICE_MAX);
    104                  deviceNumber++)
    105         if (amdGpuArchValuesTbl[deviceNumber].major==archMajor &&
    106             amdGpuArchValuesTbl[deviceNumber].minor==input->archMinor &&
    107             amdGpuArchValuesTbl[deviceNumber].stepping==input->archStepping)
     94    input->deviceType = GPUDeviceType::CAPE_VERDE;
     95    if (archMajor==0)
     96        input->deviceType = GPUDeviceType::CAPE_VERDE;
     97    else if (archMajor==7)
     98        input->deviceType = GPUDeviceType::BONAIRE;
     99    else if (archMajor==8)
     100        input->deviceType = GPUDeviceType::ICELAND;
     101   
     102    for (cxuint i = 0; i < amdGpuArchValuesNum; i++)
     103        if (amdGpuArchValuesTbl[i].major==archMajor &&
     104            amdGpuArchValuesTbl[i].minor==input->archMinor &&
     105            amdGpuArchValuesTbl[i].stepping==input->archStepping)
     106        {
     107            input->deviceType = amdGpuArchValuesTbl[i].deviceType;
    108108            break;
    109     if (deviceNumber > cxuint(GPUDeviceType::GPUDEVICE_MAX))
    110         throw Exception("Can't determine device type from arch values!");
    111     input->deviceType = GPUDeviceType(deviceNumber);
     109        }
    112110   
    113111    const size_t regionsNum = binary.getRegionsNum();
     
    134132    const bool doDumpConfig = ((flags & DISASM_CONFIG) != 0);
    135133   
    136     const GPUArchitecture arch = getGPUArchitectureFromDeviceType(rocmInput->deviceType);
    137     const cxuint maxSgprsNum = getGPUMaxRegistersNum(arch, REGTYPE_SGPR, 0);
    138    
    139134    for (cxuint i = 0; i < rocmInput->regions.size(); i++)
    140135    {
     
    147142        }
    148143    }
    149     if (doDumpCode && rocmInput->code != nullptr && rocmInput->codeSize != 0)
     144   
     145    const size_t regionsNum = rocmInput->regions.size();
     146    typedef std::pair<size_t, size_t> SortEntry;
     147    std::unique_ptr<SortEntry[]> sorted(new SortEntry[regionsNum]);
     148    for (size_t i = 0; i < regionsNum; i++)
     149        sorted[i] = std::make_pair(rocmInput->regions[i].offset, i);
     150    mapSort(sorted.get(), sorted.get() + regionsNum);
     151   
     152    if (rocmInput->code != nullptr && rocmInput->codeSize != 0)
    150153    {
     154        const cxbyte* code = rocmInput->code;
     155        output.write(".text\n", 6);
     156        for (size_t i = 0; i < regionsNum; i++)
     157        {
     158            const ROCmDisasmRegionInput& region = rocmInput->regions[sorted[i].second];
     159            output.write(region.regionName.c_str(), region.regionName.size());
     160            output.write(":\n", 2);
     161            if (region.isKernel)
     162            {
     163                if (doMetadata && !doDumpConfig)
     164                    printDisasmData(0x100, code + region.offset, output, true);
     165                if (doDumpCode)
     166                {
     167                    isaDisassembler->setInput(region.size-256, code + region.offset+256,
     168                                    region.offset+256);
     169                    isaDisassembler->setDontPrintLabels(i+1<regionsNum);
     170                    isaDisassembler->beforeDisassemble();
     171                    isaDisassembler->disassemble();
     172                }
     173            }
     174            else if (doDumpData)
     175            {
     176                output.write(".global ", 8);
     177                output.write(region.regionName.c_str(), region.regionName.size());
     178                output.write("\n", 1);
     179                printDisasmData(region.size, code + region.offset, output, true);
     180            }
     181        }
    151182    }
    152183}
  • CLRadeonExtender/trunk/amdasm/Disassembler.cpp

    r2517 r2520  
    3737
    3838ISADisassembler::ISADisassembler(Disassembler& _disassembler, cxuint outBufSize)
    39         : disassembler(_disassembler), output(outBufSize, _disassembler.getOutput())
     39        : disassembler(_disassembler), startOffset(0),
     40          dontPrintLabelsAfterCode(true), output(outBufSize, _disassembler.getOutput())
    4041{ }
    4142
     
    4647              NamedLabelIter& namedLabelIter)
    4748{
     49    pos += startOffset; // fix
    4850    if ((namedLabelIter != namedLabels.end() && namedLabelIter->first <= pos) ||
    4951            (labelIter != labels.end() && *labelIter <= pos))
     
    133135                   NamedLabelIter namedLabelIter)
    134136{
    135     size_t pos = start;
     137    size_t pos = startOffset + start;
    136138    while (namedLabelIter != namedLabels.end() || labelIter != labels.end())
    137139    {
  • CLRadeonExtender/trunk/amdasm/GCNDisasm.cpp

    r2469 r2520  
    541541          RelocIter& relocIter, uint32_t literal, FloatLitType floatLit, bool optional)
    542542{
    543     if (dasm.writeRelocation((codePos<<2)-4, relocIter))
     543    if (dasm.writeRelocation(dasm.startOffset + (codePos<<2)-4, relocIter))
    544544        return;
    545545    FastOutputBuffer& output = dasm.output;
     
    816816        case GCN_IMM_REL:
    817817        {
    818             const size_t branchPos = (pos + int16_t(imm16))<<2;
     818            const size_t branchPos = dasm.startOffset + ((pos + int16_t(imm16))<<2);
    819819            addSpaces(bufPtr, spacesToAdd);
    820820            output.forward(bufPtr-bufStart);
     
    10301030    if ((gcnInsn.mode&GCN_MASK1) == GCN_IMM_REL)
    10311031    {
    1032         const size_t branchPos = (codePos + int16_t(imm16))<<2;
     1032        const size_t branchPos = dasm.startOffset + ((codePos + int16_t(imm16))<<2);
    10331033        output.forward(bufPtr-bufStart);
    10341034        dasm.writeLocation(branchPos);
     
    24352435void GCNDisassembler::disassemble()
    24362436{
    2437     LabelIter curLabel = labels.begin();
    2438     RelocIter curReloc = relocations.begin();
    2439     NamedLabelIter curNamedLabel = namedLabels.begin();
     2437    LabelIter curLabel = std::lower_bound(labels.begin(), labels.end(), startOffset);
     2438    RelocIter curReloc = std::lower_bound(relocations.begin(), relocations.end(),
     2439        std::make_pair(startOffset, Relocation()),
     2440          [](const std::pair<size_t,Relocation>& a, const std::pair<size_t, Relocation>& b)
     2441          { return a.first < b.first; });
     2442    NamedLabelIter curNamedLabel = std::lower_bound(namedLabels.begin(), namedLabels.end(),
     2443        std::make_pair(startOffset, CString()),
     2444          [](const std::pair<size_t,CString>& a, const std::pair<size_t, CString>& b)
     2445          { return a.first < b.first; });
     2446   
    24402447    const uint32_t* codeWords = reinterpret_cast<const uint32_t*>(input);
    24412448
     
    27782785        output.put('\n');
    27792786    }
    2780     writeLabelsToEnd(codeWordsNum<<2, curLabel, curNamedLabel);
    2781     output.flush();
     2787    if (dontPrintLabelsAfterCode)
     2788    {
     2789        writeLabelsToEnd(codeWordsNum<<2, curLabel, curNamedLabel);
     2790        output.flush();
     2791    }
    27822792    disassembler.getOutput().flush();
    27832793    labels.clear(); // free labels
Note: See TracChangeset for help on using the changeset viewer.