Changeset 3118 in CLRX
- Timestamp:
- Jun 2, 2017, 2:46:26 PM (21 months ago)
- Location:
- CLRadeonExtender/trunk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
CLRadeonExtender/trunk/amdasm/DisasmAmdCL2.cpp
r2803 r3118 359 359 static AmdCL2KernelConfig genKernelConfig(size_t metadataSize, const cxbyte* metadata, 360 360 size_t setupSize, const cxbyte* setup, const std::vector<size_t> samplerOffsets, 361 const std::vector<AmdCL2RelaEntry>& textRelocs )361 const std::vector<AmdCL2RelaEntry>& textRelocs, bool isGCN14) 362 362 { 363 363 AmdCL2KernelConfig config{}; … … 394 394 if (ksetup1==0x2f) // if generic pointer support 395 395 config.useGeneric = true; 396 else 396 else if (!isGCN14) 397 397 config.useEnqueue = (ksetup1&0x20)!=0; 398 else // for GFX9 - check number of all registers must be 6+ 399 config.useEnqueue = (setupData->sgprsNum+6 == setupData->sgprsNumAll); 398 400 399 401 // get samplers … … 852 854 if (doDumpConfig) 853 855 { 856 const bool isGCN14 = getGPUArchitectureFromDeviceType( 857 amdCL2Input->deviceType) >= GPUArchitecture::GCN1_4; 854 858 AmdCL2KernelConfig config; 855 859 if (amdCL2Input->is64BitMode) 856 860 config = genKernelConfig<AmdCL2Types64>(kinput.metadataSize, 857 861 kinput.metadata, kinput.setupSize, kinput.setup, samplerOffsets, 858 kinput.textRelocs );862 kinput.textRelocs, isGCN14); 859 863 else 860 864 config = genKernelConfig<AmdCL2Types32>(kinput.metadataSize, 861 865 kinput.metadata, kinput.setupSize, kinput.setup, samplerOffsets, 862 kinput.textRelocs );866 kinput.textRelocs, isGCN14); 863 867 dumpAmdCL2KernelConfig(output, config); 864 868 } -
CLRadeonExtender/trunk/amdasm/GCNAssembler.cpp
r3116 r3118 1603 1603 gcnVOPEnc, src0Op, extraMods, instrPlace)) 1604 1604 return false; 1605 gcnAsm->instrRVUs[2].regField = GCNFIELD_DPPSDWA_SRC0; 1605 if (gcnAsm->instrRVUs[2].regField != ASMFIELD_NONE) 1606 gcnAsm->instrRVUs[2].regField = GCNFIELD_DPPSDWA_SRC0; 1606 1607 1607 1608 if (extraMods.needSDWA && isGCN14) … … 1817 1818 gcnVOPEnc, src0Op, extraMods, instrPlace)) 1818 1819 return false; 1819 gcnAsm->instrRVUs[1].regField = GCNFIELD_DPPSDWA_SRC0; 1820 if (gcnAsm->instrRVUs[1].regField != ASMFIELD_NONE) 1821 gcnAsm->instrRVUs[1].regField = GCNFIELD_DPPSDWA_SRC0; 1820 1822 if (extraMods.needSDWA && isGCN14) 1821 1823 { // fix for extra type operand from SDWA 1822 1824 AsmRegVarUsage* rvus = gcnAsm->instrRVUs; 1823 if (rvus[ 2].regField != ASMFIELD_NONE && src0Op.range.isNonVGPR())1824 rvus[ 2].regField = GCNFIELD_DPPSDWA_SSRC0;1825 if (rvus[1].regField != ASMFIELD_NONE && src0Op.range.isNonVGPR()) 1826 rvus[1].regField = GCNFIELD_DPPSDWA_SSRC0; 1825 1827 } 1826 1828 } … … 2010 2012 gcnVOPEnc, src0Op, extraMods, instrPlace)) 2011 2013 return false; 2012 gcnAsm->instrRVUs[1].regField = GCNFIELD_DPPSDWA_SRC0; 2014 if (gcnAsm->instrRVUs[1].regField != ASMFIELD_NONE) 2015 gcnAsm->instrRVUs[1].regField = GCNFIELD_DPPSDWA_SRC0; 2013 2016 2014 2017 if (extraMods.needSDWA && isGCN14) 2015 2018 { // fix for extra type operand from SDWA 2016 2019 AsmRegVarUsage* rvus = gcnAsm->instrRVUs; 2017 if (rvus[ 2].regField != ASMFIELD_NONE && src0Op.range.isNonVGPR())2018 rvus[ 2].regField = GCNFIELD_DPPSDWA_SSRC0;2019 if (rvus[ 3].regField != ASMFIELD_NONE)2020 rvus[ 3].regField = GCNFIELD_VOP_SSRC1;2020 if (rvus[1].regField != ASMFIELD_NONE && src0Op.range.isNonVGPR()) 2021 rvus[1].regField = GCNFIELD_DPPSDWA_SSRC0; 2022 if (rvus[2].regField != ASMFIELD_NONE) 2023 rvus[2].regField = GCNFIELD_VOP_SSRC1; 2021 2024 } 2022 2025 } -
CLRadeonExtender/trunk/amdbin/AmdCL2BinGen.cpp
r3117 r3118 1153 1153 fob.writeArray(40, kernelSetupBytesAfter8); 1154 1154 IntAmdCL2SetupData setupData; 1155 const cxuint neededExtraSGPRsNum = arch ==GPUArchitecture::GCN1_2 ? 4 : 2;1155 const cxuint neededExtraSGPRsNum = arch>=GPUArchitecture::GCN1_2 ? 4 : 2; 1156 1156 const cxuint extraSGPRsNum = (config.useEnqueue || config.useGeneric) ? 1157 1157 neededExtraSGPRsNum : 0; … … 1175 1175 else if (config.useArgs) 1176 1176 setup1 = 0x9; 1177 if (arch==GPUArchitecture::GCN1_4) 1178 setup1 |= 0x20; 1177 1179 1178 1180 SLEV(setupData.pgmRSRC2, calculatePgmRSRC2(config, arch)); … … 1835 1837 { 8, 0, 4 }, // GPUDeviceType::ELLESMERE 1836 1838 { 8, 0, 4 }, // GPUDeviceType::BAFFIN 1837 { 8, 0, 4 } // GPUDeviceType::GFX804 1839 { 8, 0, 4 }, // GPUDeviceType::GFX804 1840 { 9, 0, 0 } // GPUDeviceType::GFX900 1838 1841 }; 1839 1842
Note: See TracChangeset
for help on using the changeset viewer.