Changeset 3075 in CLRX


Ignore:
Timestamp:
May 23, 2017, 3:36:14 PM (2 years ago)
Author:
matszpk
Message:

CLRadeonExtender: GCNDisasm: Add new FLAT instructions and NV flag for RX VEGA. Ignore 7-bit of opcode in FLAT encoding.

Location:
CLRadeonExtender/trunk
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • CLRadeonExtender/trunk/amdasm/GCNDisasm.cpp

    r3074 r3075  
    135135};
    136136
    137 static const GCNEncodingSpace gcnInstrTableByCodeSpaces[2*(GCNENC_MAXVAL+1)+2+3] =
     137static const GCNEncodingSpace gcnInstrTableByCodeSpaces[2*(GCNENC_MAXVAL+1)+2+3+2] =
    138138{
    139139    { 0, 0 },
     
    155155    { 0x08ac, 0x80 }, /* GCNENC_MIMG, opcode = (7bit)<<18 */
    156156    { 0x092c, 0x1 }, /* GCNENC_EXP, opcode = none */
    157     { 0x092d, 0x100 }, /* GCNENC_FLAT, opcode = (8bit)<<18 (???8bit) */
    158     { 0x0a2d, 0x200 }, /* GCNENC_VOP3A, opcode = (9bit)<<17 (GCN1.1) */
    159     { 0x0a2d, 0x200 },  /* GCNENC_VOP3B, opcode = (9bit)<<17 (GCN1.1) */
    160     { 0x0c2d, 0x0 },
    161     { 0x0c2d, 0x80 }, /* GCNENC_SOPC, opcode = (7bit)<<16 (GCN1.2) */
    162     { 0x0cad, 0x80 }, /* GCNENC_SOPP, opcode = (7bit)<<16 (GCN1.2) */
    163     { 0x0d2d, 0x100 }, /* GCNENC_SOP1, opcode = (8bit)<<8 (GCN1.2) */
    164     { 0x0e2d, 0x80 }, /* GCNENC_SOP2, opcode = (7bit)<<23 (GCN1.2) */
    165     { 0x0ead, 0x20 }, /* GCNENC_SOPK, opcode = (5bit)<<23 (GCN1.2) */
    166     { 0x0ecd, 0x100 }, /* GCNENC_SMEM, opcode = (8bit)<<18 (GCN1.2) */
    167     { 0x0fcd, 0x100 }, /* GCNENC_VOPC, opcode = (8bit)<<27 (GCN1.2) */
    168     { 0x10cd, 0x100 }, /* GCNENC_VOP1, opcode = (8bit)<<9 (GCN1.2) */
    169     { 0x11cd, 0x40 }, /* GCNENC_VOP2, opcode = (6bit)<<25 (GCN1.2) */
    170     { 0x120d, 0x400 }, /* GCNENC_VOP3A, opcode = (10bit)<<16 (GCN1.2) */
    171     { 0x120d, 0x400 }, /* GCNENC_VOP3B, opcode = (10bit)<<16 (GCN1.2) */
    172     { 0x160d, 0x4 }, /* GCNENC_VINTRP, opcode = (2bit)<<16 (GCN1.2) */
    173     { 0x1611, 0x100 }, /* GCNENC_DS, opcode = (8bit)<<18 (GCN1.2) */
    174     { 0x1711, 0x80 }, /* GCNENC_MUBUF, opcode = (7bit)<<18 (GCN1.2) */
    175     { 0x1791, 0x10 }, /* GCNENC_MTBUF, opcode = (4bit)<<16 (GCN1.2) */
    176     { 0x17a1, 0x80 }, /* GCNENC_MIMG, opcode = (7bit)<<18 (GCN1.2) */
    177     { 0x1821, 0x1 }, /* GCNENC_EXP, opcode = none (GCN1.2) */
    178     { 0x1822, 0x100 }, /* GCNENC_FLAT, opcode = (8bit)<<18 (???8bit) */
    179     { 0x1922, 0x40 }, /* GCNENC_VOP2, opcode = (6bit)<<25 (RXVEGA) */
    180     { 0x1962, 0x400 }, /* GCNENC_VOP3B, opcode = (10bit)<<17  (RXVEGA) */
    181     { 0x1d62, 0x100 } /* GCNENC_VOP1, opcode = (8bit)<<9 (RXVEGA) */
     157    { 0x092d, 0x80 }, /* GCNENC_FLAT, opcode = (8bit)<<18 (???8bit) */
     158    { 0x09ad, 0x200 }, /* GCNENC_VOP3A, opcode = (9bit)<<17 (GCN1.1) */
     159    { 0x09ad, 0x200 },  /* GCNENC_VOP3B, opcode = (9bit)<<17 (GCN1.1) */
     160    { 0x0bad, 0x0 },
     161    { 0x0bad, 0x80 }, /* GCNENC_SOPC, opcode = (7bit)<<16 (GCN1.2) */
     162    { 0x0c2d, 0x80 }, /* GCNENC_SOPP, opcode = (7bit)<<16 (GCN1.2) */
     163    { 0x0cad, 0x100 }, /* GCNENC_SOP1, opcode = (8bit)<<8 (GCN1.2) */
     164    { 0x0dad, 0x80 }, /* GCNENC_SOP2, opcode = (7bit)<<23 (GCN1.2) */
     165    { 0x0e2d, 0x20 }, /* GCNENC_SOPK, opcode = (5bit)<<23 (GCN1.2) */
     166    { 0x0e4d, 0x100 }, /* GCNENC_SMEM, opcode = (8bit)<<18 (GCN1.2) */
     167    { 0x0f4d, 0x100 }, /* GCNENC_VOPC, opcode = (8bit)<<27 (GCN1.2) */
     168    { 0x104d, 0x100 }, /* GCNENC_VOP1, opcode = (8bit)<<9 (GCN1.2) */
     169    { 0x114d, 0x40 }, /* GCNENC_VOP2, opcode = (6bit)<<25 (GCN1.2) */
     170    { 0x118d, 0x400 }, /* GCNENC_VOP3A, opcode = (10bit)<<16 (GCN1.2) */
     171    { 0x118d, 0x400 }, /* GCNENC_VOP3B, opcode = (10bit)<<16 (GCN1.2) */
     172    { 0x158d, 0x4 }, /* GCNENC_VINTRP, opcode = (2bit)<<16 (GCN1.2) */
     173    { 0x1591, 0x100 }, /* GCNENC_DS, opcode = (8bit)<<18 (GCN1.2) */
     174    { 0x1691, 0x80 }, /* GCNENC_MUBUF, opcode = (7bit)<<18 (GCN1.2) */
     175    { 0x1711, 0x10 }, /* GCNENC_MTBUF, opcode = (4bit)<<16 (GCN1.2) */
     176    { 0x1721, 0x80 }, /* GCNENC_MIMG, opcode = (7bit)<<18 (GCN1.2) */
     177    { 0x17a1, 0x1 }, /* GCNENC_EXP, opcode = none (GCN1.2) */
     178    { 0x17a2, 0x80 }, /* GCNENC_FLAT, opcode = (8bit)<<18 (???8bit) */
     179    { 0x1822, 0x40 }, /* GCNENC_VOP2, opcode = (6bit)<<25 (RXVEGA) */
     180    { 0x1862, 0x400 }, /* GCNENC_VOP3B, opcode = (10bit)<<17  (RXVEGA) */
     181    { 0x1c62, 0x100 }, /* GCNENC_VOP1, opcode = (8bit)<<9 (RXVEGA) */
     182    { 0x1d62, 0x80 }, /* GCNENC_FLAT_SCRATCH, opcode = (8bit)<<18 (???8bit) RXVEGA */
     183    { 0x1de2, 0x80 }  /* GCNENC_FLAT_GLOBAL, opcode = (8bit)<<18 (???8bit) RXVEGA */
    182184};
    183185
     
    225227                const GCNEncodingSpace& encSpace4 =
    226228                    gcnInstrTableByCodeSpaces[2*GCNENC_MAXVAL+4 + encNoVOP2 + encVOP1];
     229                gcnInstrTableByCode[encSpace4.offset + instr.code] = instr;
     230            }
     231            else if((instr.archMask & ARCH_RXVEGA) != 0 &&
     232                instr.encoding == GCNENC_FLAT && (instr.mode & GCN_FLAT_MODEMASK) != 0)
     233            {   /* FLAT SCRATCH and GLOBAL instructions */
     234                const cxuint encFlatMode = (instr.mode & GCN_FLAT_MODEMASK)-1;
     235                const GCNEncodingSpace& encSpace4 =
     236                    gcnInstrTableByCodeSpaces[2*(GCNENC_MAXVAL+1)+2+3 + encFlatMode];
    227237                gcnInstrTableByCode[encSpace4.offset + instr.code] = instr;
    228238            }
     
    451461    { 18, 7 }, /* GCNENC_MIMG, opcode = (7bit)<<18 */
    452462    { 0, 0 }, /* GCNENC_EXP, opcode = none */
    453     { 18, 8 } /* GCNENC_FLAT, opcode = (8bit)<<18 (???8bit) */
     463    { 18, 7 } /* GCNENC_FLAT, opcode = (8bit)<<18 (???8bit) */
    454464};
    455465
     
    474484    { 18, 7 }, /* GCNENC_MIMG, opcode = (7bit)<<18 */
    475485    { 0, 0 }, /* GCNENC_EXP, opcode = none */
    476     { 18, 8 } /* GCNENC_FLAT, opcode = (8bit)<<18 (???8bit) */
     486    { 18, 7 } /* GCNENC_FLAT, opcode = (8bit)<<18 (???8bit) */
    477487};
    478488
     
    25752585            uint32_t insnCode2)
    25762586{
     2587    const bool isGCN14 = ((arch&ARCH_RXVEGA)!=0);
    25772588    FastOutputBuffer& output = dasm.output;
    25782589    char* bufStart = output.reserve(150);
     
    25852596    cxuint dstRegsNum = ((gcnInsn.mode & GCN_CMPSWAP)!=0) ? (dregsNum>>1) :  dregsNum;
    25862597    // tfe
    2587     dstRegsNum = (insnCode2 & 0x800000U)?dstRegsNum+1:dstRegsNum;
     2598    dstRegsNum = (!isGCN14 && (insnCode2 & 0x800000U))?dstRegsNum+1:dstRegsNum;
    25882599   
    25892600    if ((gcnInsn.mode & GCN_FLAT_ADST) == 0)
     
    26142625        decodeGCNVRegOperand((insnCode2>>8)&0xff, dregsNum, bufPtr);
    26152626    }
    2616    
     2627    // get inst_offset, with sign if FLAT_SCRATCH, FLAT_GLOBAL
     2628    const cxint instOffset = ((gcnInsn.mode & GCN_FLAT_MODEMASK) != 0 &&
     2629            (insnCode&0x1000) != 0) ? (-insnCode&0xfff) : insnCode&0xfff;
     2630    if (isGCN14 && instOffset != 0)
     2631    {
     2632        putChars(bufPtr, " inst_offset:", 13);
     2633        bufPtr += itocstrCStyle(instOffset, bufPtr, 7, 10);
     2634    }
     2635   
     2636    if (isGCN14 && (insnCode & 0x2000U))
     2637        putChars(bufPtr, " lds", 4);
    26172638    if (insnCode & 0x10000U)
    26182639        putChars(bufPtr, " glc", 4);
     
    26202641        putChars(bufPtr, " slc", 4);
    26212642    if (insnCode2 & 0x800000U)
    2622         putChars(bufPtr, " tfe", 4);
     2643    {
     2644        if (!isGCN14)
     2645            putChars(bufPtr, " tfe", 4);
     2646        else
     2647            putChars(bufPtr, " nv", 3);
     2648    }
    26232649   
    26242650    // print value, if some are not used, but values is not default
     
    28872913                                (gcnEncoding != GCNENC_VOP2) +
    28882914                                (gcnEncoding == GCNENC_VOP1)];
     2915                gcnInsn = gcnInstrTableByCode.get() + encSpace4.offset + opcode;
     2916                if (gcnInsn->mnemonic == nullptr ||
     2917                        (curArchMask & gcnInsn->archMask) == 0)
     2918                    isIllegal = true; // illegal
     2919            }
     2920            else if (isGCN14 && (curArchMask & gcnInsn->archMask) == 0 &&
     2921                gcnEncoding == GCNENC_FLAT && ((insnCode>>14)&3)!=0)
     2922            {
     2923                const GCNEncodingSpace& encSpace4 =
     2924                    gcnInstrTableByCodeSpaces[2*(GCNENC_MAXVAL+1)+2+3 +
     2925                        ((insnCode>>14)&3)-1];
    28892926                gcnInsn = gcnInstrTableByCode.get() + encSpace4.offset + opcode;
    28902927                if (gcnInsn->mnemonic == nullptr ||
  • CLRadeonExtender/trunk/amdasm/GCNInstructions.cpp

    r3073 r3075  
    24192419    { "flat_load_dwordx3",   GCNENC_FLAT,   GCN_MUBUF_MX3|GCN_FLAT_NODATA, 15,   ARCH_RX2X0  },
    24202420    { "flat_store_byte",     GCNENC_FLAT,   GCN_MUBUF_MX1|GCN_FLAT_STORE, 24,   ARCH_GCN_1_1_2_4  },
     2421    { "flat_store_byte_d16_hi", GCNENC_FLAT,   GCN_MUBUF_MX1|GCN_FLAT_STORE, 25,   ARCH_RXVEGA  },
    24212422    { "flat_store_short",    GCNENC_FLAT,   GCN_MUBUF_MX1|GCN_FLAT_STORE, 26,   ARCH_GCN_1_1_2_4  },
     2423    { "flat_store_short_d16_hi", GCNENC_FLAT,   GCN_MUBUF_MX1|GCN_FLAT_STORE, 27,   ARCH_RXVEGA  },
    24222424    { "flat_store_dword",    GCNENC_FLAT,   GCN_MUBUF_MX1|GCN_FLAT_STORE, 28,   ARCH_GCN_1_1_2_4  },
    24232425    { "flat_store_dwordx2",  GCNENC_FLAT,   GCN_MUBUF_MX2|GCN_FLAT_STORE, 29,   ARCH_GCN_1_1_2_4  },
     
    24262428    { "flat_store_dwordx3",  GCNENC_FLAT,   GCN_MUBUF_MX3|GCN_FLAT_STORE, 30,   ARCH_GCN_1_2_4  },
    24272429    { "flat_store_dwordx4",  GCNENC_FLAT,   GCN_MUBUF_MX4|GCN_FLAT_STORE, 31,   ARCH_GCN_1_2_4  },
     2430    { "flat_load_ubyte_d16", GCNENC_FLAT,   GCN_MUBUF_MX1|GCN_FLAT_NODATA, 32,   ARCH_RXVEGA  },
     2431    { "flat_load_ubyte_d16_hi", GCNENC_FLAT,   GCN_MUBUF_MX1|GCN_FLAT_NODATA, 33,   ARCH_RXVEGA  },
     2432    { "flat_load_sbyte_d16", GCNENC_FLAT,   GCN_MUBUF_MX1|GCN_FLAT_NODATA, 34,   ARCH_RXVEGA  },
     2433    { "flat_load_sbyte_d16_hi", GCNENC_FLAT,   GCN_MUBUF_MX1|GCN_FLAT_NODATA, 35,   ARCH_RXVEGA  },
     2434    { "flat_load_short_d16", GCNENC_FLAT,   GCN_MUBUF_MX1|GCN_FLAT_NODATA, 36,   ARCH_RXVEGA  },
     2435    { "flat_load_short_d16_hi", GCNENC_FLAT,   GCN_MUBUF_MX1|GCN_FLAT_NODATA, 37,   ARCH_RXVEGA  },
    24282436    { "flat_atomic_swap",    GCNENC_FLAT,   GCN_MUBUF_MX1|GCN_MATOMIC, 48,   ARCH_RX2X0  },
    24292437    { "flat_atomic_cmpswap", GCNENC_FLAT,   GCN_MUBUF_MX2|GCN_ACMPSWAP, 49,   ARCH_RX2X0  },
  • CLRadeonExtender/trunk/amdasm/GCNInternals.h

    r3070 r3075  
    198198    GCN_CMPSWAP =  0x80,    /// ???
    199199    GCN_ACMPSWAP =  0x6080,    /// ???
     200    GCN_FLAT_FLAT = 0,
     201    GCN_FLAT_SCRATCH = 1,
     202    GCN_FLAT_GLOBAL = 2,
     203    GCN_FLAT_MODEMASK = 7,
    200204    GCN_MASK1 = 0xf0,
    201205    GCN_MASK2 = 0xf00,
  • CLRadeonExtender/trunk/tests/amdasm/GCNDisasmOpc11.cpp

    r3051 r3075  
    30833083    { 0xdc270000U, 0x2f8000bbU, true, "        flat_load_sbyte "
    30843084                "v[47:48], v[187:188] glc slc tfe\n" },
     3085    { 0xde270000U, 0x2f8000bbU, true, "        flat_load_sbyte "
     3086                "v[47:48], v[187:188] glc slc tfe\n" },
    30853087    { 0xdc2b0000U, 0x2f8000bbU, true, "        flat_load_ushort "
    30863088                "v[47:48], v[187:188] glc slc tfe\n" },
  • CLRadeonExtender/trunk/tests/amdasm/GCNDisasmOpc12.cpp

    r3052 r3075  
    30333033    { 0xdc030000U, 0x2f8041bbU, true, "        FLAT_ill_0      "
    30343034                "v[47:48], v[187:188], v65 glc slc tfe\n" },
     3035    { 0xde030000U, 0x2f8041bbU, true, "        FLAT_ill_0      "
     3036                "v[47:48], v[187:188], v65 glc slc tfe\n" },
    30353037    { 0xdc070000U, 0x2f8041bbU, true, "        FLAT_ill_1      "
    30363038                "v[47:48], v[187:188], v65 glc slc tfe\n" },
     
    30793081    /* FLAT instructions */
    30803082    { 0xdc470000U, 0x2f8000bbU, true, "        flat_load_sbyte "
     3083                "v[47:48], v[187:188] glc slc tfe\n" },
     3084    { 0xde470000U, 0x2f8000bbU, true, "        flat_load_sbyte "
    30813085                "v[47:48], v[187:188] glc slc tfe\n" },
    30823086    { 0xdc4b0000U, 0x2f8000bbU, true, "        flat_load_ushort "
  • CLRadeonExtender/trunk/tests/amdasm/GCNDisasmOpc14.cpp

    r3073 r3075  
    479479    { 0xf12cfb00U, 0x00159d79U, true, "        image_gather8h_pck "
    480480            "v[157:160], v[121:124], s[84:91], s[0:3] dmask:11 unorm glc a16 da\n" },
     481    /* FLAT encoding */
     482    { 0xdc400000U, 0x2f8000bbU, true, "        flat_load_ubyte v47, v[187:188] nv\n" },
     483    { 0xdc400000U, 0x2f0000bbU, true, "        flat_load_ubyte v47, v[187:188]\n" },
     484    { 0xdc402000U, 0x2f0000bbU, true, "        flat_load_ubyte v47, v[187:188] lds\n" },
     485    { 0xdc400211U, 0x2f0000bbU, true,
     486        "        flat_load_ubyte v47, v[187:188] inst_offset:529\n" },
     487    /* FLAT instructions */
     488    { 0xdc670000U, 0x008041bbU, true, "        flat_store_byte_d16_hi "
     489        "v[187:188], v65 glc slc nv\n" },
     490    { 0xdc6f0000U, 0x008041bbU, true, "        flat_store_short_d16_hi "
     491        "v[187:188], v65 glc slc nv\n" },
     492    { 0xdc830000U, 0x2f8000bbU, true, "        flat_load_ubyte_d16 "
     493        "v47, v[187:188] glc slc nv\n" },
     494    { 0xdc870000U, 0x2f8000bbU, true, "        flat_load_ubyte_d16_hi "
     495        "v47, v[187:188] glc slc nv\n" },
     496    { 0xdc8b0000U, 0x2f8000bbU, true, "        flat_load_sbyte_d16 "
     497        "v47, v[187:188] glc slc nv\n" },
     498    { 0xdc8f0000U, 0x2f8000bbU, true, "        flat_load_sbyte_d16_hi "
     499        "v47, v[187:188] glc slc nv\n" },
     500    { 0xdc930000U, 0x2f8000bbU, true, "        flat_load_short_d16 "
     501        "v47, v[187:188] glc slc nv\n" },
     502    { 0xdc970000U, 0x2f8000bbU, true, "        flat_load_short_d16_hi "
     503        "v47, v[187:188] glc slc nv\n" },
    481504    { 0, 0, false, nullptr }
    482505};
Note: See TracChangeset for help on using the changeset viewer.