Changeset 3099 in CLRX
- Timestamp:
- May 29, 2017, 5:21:21 PM (21 months ago)
- Location:
- CLRadeonExtender/trunk
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
CLRadeonExtender/trunk/CLRX/amdasm/AsmDefs.h
r3083 r3099 484 484 GCNFIELD_FLAT_VDST, 485 485 GCNFIELD_FLAT_VDSTLAST, 486 GCNFIELD_DPPSDWA_SRC0 486 GCNFIELD_DPPSDWA_SRC0, 487 GCNFIELD_DPPSDWA_SSRC0 487 488 }; 488 489 -
CLRadeonExtender/trunk/amdasm/GCNAsmHelpers.cpp
r3098 r3099 2230 2230 good = false; 2231 2231 } 2232 const bool vopSDWA = (haveDstSel || haveDstUnused || haveSrc0Sel || haveSrc1Sel); 2232 const bool vopSDWA = (haveDstSel || haveDstUnused || haveSrc0Sel || haveSrc1Sel || 2233 opMods.sextMod!=0); 2233 2234 const bool vopDPP = (haveDppCtrl || haveBoundCtrl || haveBankMask || haveRowMask); 2234 2235 const bool isGCN14 = (arch & ARCH_RXVEGA) != 0; … … 2429 2430 } 2430 2431 2431 bool GCNAsmUtils::checkGCNVOPExtraModifers(Assembler& asmr, bool needImm, bool sextFlags,2432 bool vop3, GCNVOPEnc gcnVOPEnc, const GCNOperand& src0Op,2432 bool GCNAsmUtils::checkGCNVOPExtraModifers(Assembler& asmr, uint16_t arch, bool needImm, 2433 bool sextFlags, bool vop3, GCNVOPEnc gcnVOPEnc, const GCNOperand& src0Op, 2433 2434 VOPExtraModifiers& extraMods, const char* instrPlace) 2434 2435 { … … 2438 2439 return false; 2439 2440 } 2440 if ( !src0Op.range.isVGPR())2441 if ((arch & ARCH_RXVEGA)==0 && !src0Op.range.isVGPR()) 2441 2442 { 2442 2443 asmr.printError(instrPlace, "SRC0 must be a vector register with " 2443 2444 "SDWA or DPP word"); 2445 return false; 2446 } 2447 if ((arch & ARCH_RXVEGA)!=0 && extraMods.needDPP && !src0Op.range.isVGPR()) 2448 { 2449 asmr.printError(instrPlace, "SRC0 must be a vector register with DPP word"); 2444 2450 return false; 2445 2451 } … … 2449 2455 return false; 2450 2456 } 2451 if (sextFlags & extraMods.needDPP)2457 if (sextFlags && extraMods.needDPP) 2452 2458 { 2453 2459 asmr.printError(instrPlace, "SEXT modifiers is unavailable for DPP word"); -
CLRadeonExtender/trunk/amdasm/GCNAsmInternals.h
r3098 r3099 268 268 static bool checkGCNVOPEncoding(Assembler& asmr, const char* insnPtr, 269 269 GCNVOPEnc vopEnc, const VOPExtraModifiers* modifiers); 270 static bool checkGCNVOPExtraModifers(Assembler& asmr, bool needImm, bool sextFlags, 271 bool vop3, GCNVOPEnc gcnVOPEnc, const GCNOperand& src0Op, 272 VOPExtraModifiers& extraMods, const char* instrPlace); 270 static bool checkGCNVOPExtraModifers(Assembler& asmr, uint16_t arch, 271 bool needImm, bool sextFlags, bool vop3, GCNVOPEnc gcnVOPEnc, 272 const GCNOperand& src0Op, VOPExtraModifiers& extraMods, 273 const char* instrPlace); 273 274 274 275 static bool parseSOP2Encoding(Assembler& asmr, const GCNAsmInstruction& gcnInsn, -
CLRadeonExtender/trunk/amdasm/GCNAssembler.cpp
r3098 r3099 286 286 case GCNFIELD_M_SOFFSET: 287 287 rstart = (code2>>24)&0xff; 288 break; 289 case GCNFIELD_DPPSDWA_SSRC0: 290 rstart = code2&0xff; 288 291 break; 289 292 default: … … 1532 1535 ((opMods.sextMod&2) ? VOPOP_SEXT : 0); 1533 1536 1537 // TODO: to modify or delete 1538 extraMods.needSDWA |= ((src0Op.vopMods | src1Op.vopMods) & VOPOP_SEXT) != 0; 1534 1539 bool vop3 = /* src1=sgprs and not (DS1_SGPR|src1_SGPR) */ 1535 1540 //((src1Op.range.start<256) ^ sgprRegInSrc1) || 1536 (src1Op.range.isNonVGPR() ^ sgprRegInSrc1) || 1541 ((!isGCN14 || !extraMods.needSDWA) && 1542 (src1Op.range.isNonVGPR() ^ sgprRegInSrc1)) || 1537 1543 (!isGCN12 && (src0Op.vopMods!=0 || src1Op.vopMods!=0)) || 1538 (modifiers&~(VOP3_BOUNDCTRL|(extraMods.needSDWA?VOP3_CLAMP:0)))!=0 || 1544 (modifiers&~(VOP3_BOUNDCTRL|(extraMods.needSDWA?VOP3_CLAMP:0)| 1545 /* exclude OMOD if RXVEGA and SDWA used */ 1546 ((isGCN14 && extraMods.needSDWA) ? 3 : 0)))!=0 || 1539 1547 /* srcCC!=VCC or dstCC!=VCC */ 1540 1548 //(haveDstCC && dstCCReg.start!=106) || (haveSrcCC && srcCCReg.start!=106) || … … 1592 1600 gcnVOPEnc!=GCNVOPEnc::NORMAL)) 1593 1601 { /* if VOP_SDWA or VOP_DPP is required */ 1594 if (!checkGCNVOPExtraModifers(asmr, needImm, sextFlags, vop3, gcnVOPEnc, src0Op,1595 extraMods, instrPlace))1602 if (!checkGCNVOPExtraModifers(asmr, arch, needImm, sextFlags, vop3, 1603 gcnVOPEnc, src0Op, extraMods, instrPlace)) 1596 1604 return false; 1597 1605 gcnAsm->instrRVUs[2].regField = GCNFIELD_DPPSDWA_SRC0; 1606 1607 if (extraMods.needSDWA && isGCN14) 1608 { // fix for extra type operand from SDWA 1609 AsmRegVarUsage* rvus = gcnAsm->instrRVUs; 1610 if (rvus[2].regField != ASMFIELD_NONE && src0Op.range.isNonVGPR()) 1611 rvus[2].regField = GCNFIELD_DPPSDWA_SSRC0; 1612 if (rvus[3].regField != ASMFIELD_NONE) 1613 rvus[3].regField = GCNFIELD_VOP_SSRC1; 1614 } 1598 1615 } 1599 1616 else if (isGCN12 && ((src0Op.vopMods|src1Op.vopMods) & ~VOPOP_SEXT)!=0 && !sextFlags) … … 1649 1666 ((src1Op.vopMods&VOPOP_SEXT) ? (1U<<27) : 0) | 1650 1667 ((src1Op.vopMods&VOPOP_NEG) ? (1U<<28) : 0) | 1651 ((src1Op.vopMods&VOPOP_ABS) ? (1U<<29) : 0)); 1668 ((src1Op.vopMods&VOPOP_ABS) ? (1U<<29) : 0) | 1669 (src0Op.range.isNonVGPR() ? (1U<<23) : 0) | 1670 (src1Op.range.isNonVGPR() ? (1U<<31) : 0) | 1671 ((modifiers & 3) << 14)); 1652 1672 else if (extraMods.needDPP) 1653 1673 SLEV(words[wordsNum++], (src0Op.range.bstart()&0xff) | (extraMods.dppCtrl<<8) | … … 1727 1747 const uint16_t mode2 = (gcnInsn.mode & GCN_MASK2); 1728 1748 const bool isGCN12 = (arch & ARCH_GCN_1_2_4)!=0; 1749 const bool isGCN14 = (arch & ARCH_RXVEGA)!=0; 1729 1750 1730 1751 GCNAssembler* gcnAsm = static_cast<GCNAssembler*>(asmr.isaAssembler); … … 1772 1793 1773 1794 bool vop3 = ((!isGCN12 && src0Op.vopMods!=0) || 1774 (modifiers&~(VOP3_BOUNDCTRL|(extraMods.needSDWA?VOP3_CLAMP:0)))!=0) || 1795 (modifiers&~(VOP3_BOUNDCTRL|(extraMods.needSDWA?VOP3_CLAMP:0)| 1796 /* exclude OMOD if RXVEGA and SDWA used */ 1797 ((isGCN14 && extraMods.needSDWA) ? 3 : 0)))!=0) || 1775 1798 (gcnEncSize==GCNEncSize::BIT64); 1776 1799 … … 1790 1813 gcnVOPEnc!=GCNVOPEnc::NORMAL)) 1791 1814 { /* if VOP_SDWA or VOP_DPP is required */ 1792 if (!checkGCNVOPExtraModifers(asmr, needImm, sextFlags, vop3, gcnVOPEnc, src0Op,1793 extraMods, instrPlace))1815 if (!checkGCNVOPExtraModifers(asmr, arch, needImm, sextFlags, vop3, 1816 gcnVOPEnc, src0Op, extraMods, instrPlace)) 1794 1817 return false; 1795 1818 gcnAsm->instrRVUs[1].regField = GCNFIELD_DPPSDWA_SRC0; 1819 if (extraMods.needSDWA && isGCN14) 1820 { // fix for extra type operand from SDWA 1821 AsmRegVarUsage* rvus = gcnAsm->instrRVUs; 1822 if (rvus[2].regField != ASMFIELD_NONE && src0Op.range.isNonVGPR()) 1823 rvus[2].regField = GCNFIELD_DPPSDWA_SSRC0; 1824 } 1796 1825 } 1797 1826 else if (isGCN12 && (src0Op.vopMods & ~VOPOP_SEXT)!=0 && !sextFlags) … … 1832 1861 ((src0Op.vopMods&VOPOP_SEXT) ? (1U<<19) : 0) | 1833 1862 ((src0Op.vopMods&VOPOP_NEG) ? (1U<<20) : 0) | 1834 ((src0Op.vopMods&VOPOP_ABS) ? (1U<<21) : 0)); 1863 ((src0Op.vopMods&VOPOP_ABS) ? (1U<<21) : 0) | 1864 (src0Op.range.isNonVGPR() ? (1U<<23) : 0) | 1865 ((modifiers & 3) << 14)); 1835 1866 else if (extraMods.needDPP) 1836 1867 SLEV(words[wordsNum++], (src0Op.range.bstart()&0xff) | (extraMods.dppCtrl<<8) | … … 1970 2001 gcnVOPEnc!=GCNVOPEnc::NORMAL)) 1971 2002 { /* if VOP_SDWA or VOP_DPP is required */ 1972 if (!checkGCNVOPExtraModifers(asmr, needImm, sextFlags, vop3, gcnVOPEnc, src0Op,1973 extraMods, instrPlace))2003 if (!checkGCNVOPExtraModifers(asmr, arch, needImm, sextFlags, vop3, 2004 gcnVOPEnc, src0Op, extraMods, instrPlace)) 1974 2005 return false; 1975 2006 gcnAsm->instrRVUs[1].regField = GCNFIELD_DPPSDWA_SRC0; -
CLRadeonExtender/trunk/tests/amdasm/GCNAsmOpc14.cpp
r3092 r3099 264 264 { " s_atomic_dec_x2 s[50:51], s[60:61], 0x5b\n", 265 265 0xc2b20c9eU, 0x5b, true, true, "" }, 266 /* SDWA encoding */ 267 { " v_cndmask_b32 v154, v0, v107, vcc dst_sel:byte0 src0_sel:byte0 src1_sel:byte0", 268 0x0134d6f9U, 0, true, true, "" }, 269 { " v_cndmask_b32 v154, v0, v107, vcc " 270 "mul:4 dst_sel:byte0 src0_sel:byte0 src1_sel:byte0", 271 0x0134d6f9U, 0x8000, true, true, "" }, 272 { "v_add_f32 v154, v61, v107 dst_sel:byte0 src0_sel:byte0 src1_sel:byte0\n", 273 0x0334d6f9U, 0x0000003dU, true, true, "" }, 274 { "v_add_f32 v154, v61, vcc_hi dst_sel:byte0 src0_sel:byte0 src1_sel:byte0\n", 275 0x0334d6f9U, 0x8000003dU, true, true, "" }, 276 { "v_add_f32 v154, s61, v107 dst_sel:byte0 src0_sel:byte0 src1_sel:byte0\n", 277 0x0334d6f9U, 0x0080003dU, true, true, "" }, 278 { "v_cndmask_b32 v154, sext(-abs(v65)), v107, vcc mul:2", 279 0x0134d6f9U, 0x063e4641U, true, true, "" }, 280 { "v_add_f32 v154, sext(-abs(v65)), vcc_hi", 281 0x0334d6f9U, 0x863e0641U, true, true, "" }, 266 282 { nullptr, 0, 0, false, false, 0 } 267 283 };
Note: See TracChangeset
for help on using the changeset viewer.