Changeset 3118 in CLRX


Ignore:
Timestamp:
Jun 2, 2017, 2:46:26 PM (19 months ago)
Author:
matszpk
Message:

CLRadeonExtender: Fixed registering RegVarUsage? in VOPx SDWA. Correct detecting of useenqueue (GFX9). Correct setting setup1 field kernel setup for GFX9.
Add missing entry in amdGpuArchValuesTbl for GFX9.

Location:
CLRadeonExtender/trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • CLRadeonExtender/trunk/amdasm/DisasmAmdCL2.cpp

    r2803 r3118  
    359359static AmdCL2KernelConfig genKernelConfig(size_t metadataSize, const cxbyte* metadata,
    360360        size_t setupSize, const cxbyte* setup, const std::vector<size_t> samplerOffsets,
    361         const std::vector<AmdCL2RelaEntry>& textRelocs)
     361        const std::vector<AmdCL2RelaEntry>& textRelocs, bool isGCN14)
    362362{
    363363    AmdCL2KernelConfig config{};
     
    394394    if (ksetup1==0x2f) // if generic pointer support
    395395        config.useGeneric = true;
    396     else
     396    else if (!isGCN14)
    397397        config.useEnqueue = (ksetup1&0x20)!=0;
     398    else // for GFX9 - check number of all registers must be 6+
     399        config.useEnqueue = (setupData->sgprsNum+6 == setupData->sgprsNumAll);
    398400   
    399401    // get samplers
     
    852854        if (doDumpConfig)
    853855        {
     856            const bool isGCN14 = getGPUArchitectureFromDeviceType(
     857                        amdCL2Input->deviceType) >= GPUArchitecture::GCN1_4;
    854858            AmdCL2KernelConfig config;
    855859            if (amdCL2Input->is64BitMode)
    856860                config = genKernelConfig<AmdCL2Types64>(kinput.metadataSize,
    857861                        kinput.metadata, kinput.setupSize, kinput.setup, samplerOffsets,
    858                         kinput.textRelocs);
     862                        kinput.textRelocs, isGCN14);
    859863            else
    860864                config = genKernelConfig<AmdCL2Types32>(kinput.metadataSize,
    861865                        kinput.metadata, kinput.setupSize, kinput.setup, samplerOffsets,
    862                         kinput.textRelocs);
     866                        kinput.textRelocs, isGCN14);
    863867            dumpAmdCL2KernelConfig(output, config);
    864868        }
  • CLRadeonExtender/trunk/amdasm/GCNAssembler.cpp

    r3116 r3118  
    16031603                    gcnVOPEnc, src0Op, extraMods, instrPlace))
    16041604            return false;
    1605         gcnAsm->instrRVUs[2].regField = GCNFIELD_DPPSDWA_SRC0;
     1605        if (gcnAsm->instrRVUs[2].regField != ASMFIELD_NONE)
     1606            gcnAsm->instrRVUs[2].regField = GCNFIELD_DPPSDWA_SRC0;
    16061607       
    16071608        if (extraMods.needSDWA && isGCN14)
     
    18171818                    gcnVOPEnc, src0Op, extraMods, instrPlace))
    18181819            return false;
    1819         gcnAsm->instrRVUs[1].regField = GCNFIELD_DPPSDWA_SRC0;
     1820        if (gcnAsm->instrRVUs[1].regField != ASMFIELD_NONE)
     1821            gcnAsm->instrRVUs[1].regField = GCNFIELD_DPPSDWA_SRC0;
    18201822        if (extraMods.needSDWA && isGCN14)
    18211823        {   // fix for extra type operand from SDWA
    18221824            AsmRegVarUsage* rvus = gcnAsm->instrRVUs;
    1823             if (rvus[2].regField != ASMFIELD_NONE && src0Op.range.isNonVGPR())
    1824                 rvus[2].regField = GCNFIELD_DPPSDWA_SSRC0;
     1825            if (rvus[1].regField != ASMFIELD_NONE && src0Op.range.isNonVGPR())
     1826                rvus[1].regField = GCNFIELD_DPPSDWA_SSRC0;
    18251827        }
    18261828    }
     
    20102012                    gcnVOPEnc, src0Op, extraMods, instrPlace))
    20112013            return false;
    2012         gcnAsm->instrRVUs[1].regField = GCNFIELD_DPPSDWA_SRC0;
     2014        if (gcnAsm->instrRVUs[1].regField != ASMFIELD_NONE)
     2015            gcnAsm->instrRVUs[1].regField = GCNFIELD_DPPSDWA_SRC0;
    20132016       
    20142017        if (extraMods.needSDWA && isGCN14)
    20152018        {   // fix for extra type operand from SDWA
    20162019            AsmRegVarUsage* rvus = gcnAsm->instrRVUs;
    2017             if (rvus[2].regField != ASMFIELD_NONE && src0Op.range.isNonVGPR())
    2018                 rvus[2].regField = GCNFIELD_DPPSDWA_SSRC0;
    2019             if (rvus[3].regField != ASMFIELD_NONE)
    2020                 rvus[3].regField = GCNFIELD_VOP_SSRC1;
     2020            if (rvus[1].regField != ASMFIELD_NONE && src0Op.range.isNonVGPR())
     2021                rvus[1].regField = GCNFIELD_DPPSDWA_SSRC0;
     2022            if (rvus[2].regField != ASMFIELD_NONE)
     2023                rvus[2].regField = GCNFIELD_VOP_SSRC1;
    20212024        }
    20222025    }
  • CLRadeonExtender/trunk/amdbin/AmdCL2BinGen.cpp

    r3117 r3118  
    11531153    fob.writeArray(40, kernelSetupBytesAfter8);
    11541154    IntAmdCL2SetupData setupData;
    1155     const cxuint neededExtraSGPRsNum = arch==GPUArchitecture::GCN1_2 ? 4 : 2;
     1155    const cxuint neededExtraSGPRsNum = arch>=GPUArchitecture::GCN1_2 ? 4 : 2;
    11561156    const cxuint extraSGPRsNum = (config.useEnqueue || config.useGeneric) ?
    11571157                neededExtraSGPRsNum : 0;
     
    11751175    else if (config.useArgs)
    11761176        setup1 = 0x9;
     1177    if (arch==GPUArchitecture::GCN1_4)
     1178        setup1 |= 0x20;
    11771179   
    11781180    SLEV(setupData.pgmRSRC2, calculatePgmRSRC2(config, arch));
     
    18351837    { 8, 0, 4 }, // GPUDeviceType::ELLESMERE
    18361838    { 8, 0, 4 }, // GPUDeviceType::BAFFIN
    1837     { 8, 0, 4 }  // GPUDeviceType::GFX804
     1839    { 8, 0, 4 }, // GPUDeviceType::GFX804
     1840    { 9, 0, 0 }  // GPUDeviceType::GFX900
    18381841};
    18391842
Note: See TracChangeset for help on using the changeset viewer.