Changeset 4875 in CLRX


Ignore:
Timestamp:
Jul 28, 2019, 10:44:33 PM (5 months ago)
Author:
matszpk
Message:

CLRadeonExtender: Asm: Add '--wave32' command line option and pseudo-ops '.nowave32' and '.wave32'.
Disasm: Print '.wave32' if DISASM_WAVE32 flag supplied. Docs: Add description of new pseudo-ops and command line options to documentation.

Location:
CLRadeonExtender/trunk
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • CLRadeonExtender/trunk/CLRX/amdasm/Assembler.h

    r4692 r4875  
    6363    ASM_MACRONOCASE = 16, /// disable case-insensitive naming (default)
    6464    ASM_OLDMODPARAM = 32,   ///< use old modifier parametrization (values 0 and 1 only)
    65     ASM_WAVE32 = 0x64, ///< use WAVESIZE32
     65    ASM_WAVE32 = 64, ///< use WAVESIZE32
    6666    ASM_TESTRESOLVE = (1U<<30), ///< enable resolving symbols if ASM_TESTRUN enabled
    6767    ASM_TESTRUN = (1U<<31), ///< only for running tests
    6868    ASM_ALL = FLAGS_ALL&~(ASM_TESTRUN|ASM_TESTRESOLVE|ASM_BUGGYFPLIT|ASM_MACRONOCASE|
    69                     ASM_OLDMODPARAM)  ///< all flags
     69                    ASM_WAVE32|ASM_OLDMODPARAM)  ///< all flags
    7070};
    7171
     
    582582    bool macroCase;
    583583    bool oldModParam;
     584    bool wave32;
    584585   
    585586    cxuint inclusionLevel;
  • CLRadeonExtender/trunk/amdasm/AsmPseudoOps.cpp

    r4182 r4875  
    113113    "line", "ln", "local", "long",
    114114    "macro", "macrocase", "main", "noaltmacro",
    115     "nobuggyfplit", "nomacrocase", "nooldmodparam", "octa",
    116     "offset", "oldmodparam", "org",
     115    "nobuggyfplit", "nomacrocase", "nooldmodparam",
     116    "nowave32", "octa", "offset", "oldmodparam", "org",
    117117    "p2align", "policy", "print", "purgem", "quad",
    118118    "rawcode", "regvar", "rept", "rocm", "rodata",
     
    122122    "string64", "struct", "text", "title",
    123123    "undef", "unusing", "usereg", "using", "version",
    124     "warning", "weak", "while", "word"
     124    "warning", "wave32", "weak", "while", "word"
    125125};
    126126
     
    156156    ASMOP_LINE, ASMOP_LN, ASMOP_LOCAL, ASMOP_LONG,
    157157    ASMOP_MACRO, ASMOP_MACROCASE, ASMOP_MAIN, ASMOP_NOALTMACRO,
    158     ASMOP_NOBUGGYFPLIT, ASMOP_NOMACROCASE, ASMOP_NOOLDMODPARAM, ASMOP_OCTA,
    159     ASMOP_OFFSET, ASMOP_OLDMODPARAM, ASMOP_ORG,
     158    ASMOP_NOBUGGYFPLIT, ASMOP_NOMACROCASE, ASMOP_NOOLDMODPARAM,
     159    ASMOP_NOWAVE32, ASMOP_OCTA, ASMOP_OFFSET, ASMOP_OLDMODPARAM, ASMOP_ORG,
    160160    ASMOP_P2ALIGN, ASMOP_POLICY, ASMOP_PRINT, ASMOP_PURGEM, ASMOP_QUAD,
    161161    ASMOP_RAWCODE, ASMOP_REGVAR, ASMOP_REPT, ASMOP_ROCM, ASMOP_RODATA,
     
    165165    ASMOP_STRING64, ASMOP_STRUCT, ASMOP_TEXT, ASMOP_TITLE,
    166166    ASMOP_UNDEF, ASMOP_UNUSING, ASMOP_USEREG, ASMOP_USING, ASMOP_VERSION,
    167     ASMOP_WARNING, ASMOP_WEAK, ASMOP_WHILE, ASMOP_WORD
     167    ASMOP_WARNING, ASMOP_WAVE32, ASMOP_WEAK, ASMOP_WHILE, ASMOP_WORD
    168168};
    169169
     
    598598                oldModParam = false;
    599599            break;
     600        case ASMOP_NOWAVE32:
     601            if (AsmPseudoOps::checkGarbagesAtEnd(*this, linePtr))
     602                wave32 = false;
     603            break;
    600604        case ASMOP_OCTA:
    601605            AsmPseudoOps::putUInt128s(*this, stmtPlace, linePtr);
     
    697701        case ASMOP_WARNING:
    698702            AsmPseudoOps::doWarning(*this, stmtPlace, linePtr);
     703            break;
     704        case ASMOP_WAVE32:
     705            if (AsmPseudoOps::checkGarbagesAtEnd(*this, linePtr))
     706                wave32 = true;
    699707            break;
    700708        case ASMOP_WEAK:
  • CLRadeonExtender/trunk/amdasm/Assembler.cpp

    r4645 r4875  
    780780    macroCase = (flags & ASM_MACRONOCASE)==0;
    781781    oldModParam = (flags & ASM_OLDMODPARAM)!=0;
     782    wave32 = (flags & ASM_WAVE32)!=0;
    782783    localCount = macroCount = inclusionLevel = 0;
    783784    macroSubstLevel = repetitionLevel = 0;
     
    821822    macroCase = (flags & ASM_MACRONOCASE)==0;
    822823    oldModParam = (flags & ASM_OLDMODPARAM)!=0;
     824    wave32 = (flags & ASM_WAVE32)!=0;
    823825    localCount = macroCount = inclusionLevel = 0;
    824826    macroSubstLevel = repetitionLevel = 0;
  • CLRadeonExtender/trunk/amdasm/Disassembler.cpp

    r4424 r4875  
    622622    output.write(gpuName, ::strlen(gpuName));
    623623    output.put('\n');
     624    if ((flags & DISASM_WAVE32)!=0)
     625        output.write(".wave32\n", 8);
    624626   
    625627    // call main disasembly routine
  • CLRadeonExtender/trunk/amdasm/GCNAsmEncode1.cpp

    r4874 r4875  
    13291329    const bool isGCN14 = (arch & ARCH_GCN_1_4_5)!=0;
    13301330    const bool isGCN15 = (arch & ARCH_GCN_1_5)!=0;
    1331     const bool isWave32 = (asmr.getFlags() & ASM_WAVE32)!=0;
    13321331    GCNAssembler* gcnAsm = static_cast<GCNAssembler*>(asmr.isaAssembler);
    13331332   
     
    13551354    const bool haveSrcCC = mode1 == GCN_DS2_VCC || mode1 == GCN_SRC2_VCC;
    13561355   
    1357     const cxuint waveRegSize = (!isGCN15 || !isWave32 ||
     1356    const cxuint waveRegSize = (!isGCN15 || !asmr.wave32 ||
    13581357                        (gcnInsn.mode&GCN_VOP_NOWVSZ)!=0) ? 2 : 1;
    13591358    if (haveDstCC) /* VOP3b */
     
    17701769    const bool isGCN14 = (arch & ARCH_GCN_1_4_5)!=0;
    17711770    const bool isGCN15 = (arch & ARCH_GCN_1_5)!=0;
    1772     const bool isWave32 = (asmr.getFlags() & ASM_WAVE32)!=0;
    17731771   
    17741772    GCNAssembler* gcnAsm = static_cast<GCNAssembler*>(asmr.isaAssembler);
     
    17841782    {
    17851783        gcnAsm->setCurrentRVU(0);
    1786         const cxuint regSize = (!isGCN15 || !isWave32 ||
     1784        const cxuint regSize = (!isGCN15 || !asmr.wave32 ||
    17871785                        (gcnInsn.mode&GCN_VOP_NOWVSZ)!=0) ? 2 : 1;
    17881786        good &= parseSRegRange(asmr, linePtr, dstReg, arch, regSize, GCNFIELD_VOP3_SDST0,
     
    19611959    const bool isGCN14 = (arch & ARCH_GCN_1_4_5)!=0;
    19621960    const bool isGCN15 = (arch & ARCH_GCN_1_5)!=0;
    1963     const bool isWave32 = (asmr.getFlags() & ASM_WAVE32)!=0;
    19641961    const bool vop3p = (gcnInsn.mode & GCN_VOP3_VOP3P) != 0 ||
    19651962                    (gcnInsn.encoding == GCNENC_VOP3P);
     
    20142011            // SDST (VCC) (2 SGPR's)
    20152012            gcnAsm->setCurrentRVU(1);
    2016             const cxuint regSize = (!isGCN15 || !isWave32 ||
     2013            const cxuint regSize = (!isGCN15 || !asmr.wave32 ||
    20172014                        (gcnInsn.mode&GCN_VOP_NOWVSZ)!=0) ? 2 : 1;
    20182015            good &= parseSRegRange(asmr, linePtr, sdstReg, arch, regSize,
  • CLRadeonExtender/trunk/doc/ClrxAsmInvoke.md

    r4652 r4875  
    1111The `clrxasm` can be invoked in following way:
    1212
    13 clrxasm [-6Swam?] [-D SYM[=VALUE]] [-I PATH] [-o OUTFILE] [-b BINFORMAT]
     13clrxasm [-63Swam?] [-D SYM[=VALUE]] [-I PATH] [-o OUTFILE] [-b BINFORMAT]
    1414[-g GPUDEVICE] [-A ARCH] [-t VERSION] [--defsym=SYM[=VALUE]] [--includePath=PATH]
    1515[--output OUTFILE] [--binaryFormat=BINFORMAT] [--64bit] [--gpuType=GPUDEVICE]
    1616[--arch=ARCH] [--driverVersion=VERSION] [--llvmVersion=VERSION] [--newROCmBinFormat]
    1717[--forceAddSymbols] [--noWarnings] [--alternate] [--buggyFPLit] [--oldModParam]
    18 [--noMacroCase] [--policy=VERSION] [--help] [--usage] [--version] [file...]
     18[--noMacroCase] [--wave32] [--policy=VERSION] [--help] [--usage] [--version] [file...]
    1919
    2020### Input
     
    110110    Do not ignore letter's case in macro names (by default is ignored).
    111111
     112* **-3**, **--wave32**
     113
     114    Set wavefront size as 32 elements (apply only for GFX10 devices).
     115
    112116* **--policy=VERSION**
    113117
  • CLRadeonExtender/trunk/doc/ClrxAsmPseudoOps.md

    r4405 r4875  
    777777Disable old modifier parametrization that accepts only 0 and 1 values (to 0.1.5 version).
    778778
     779### .nowave32
     780
     781Disable wavefront size as 32 elements (apply only for GFX10 devices).
     782Use default 64 element wavesize.
     783
    779784### .octa
    780785
     
    985990Print warning message specified in first argument.
    986991
     992### .wave32
     993
     994Set wavefront size as 32 elements (apply only for GFX10 devices).
     995
    987996### .weak
    988997
  • CLRadeonExtender/trunk/programs/clrxasm.cpp

    r4132 r4875  
    4646    { "arch", 'A', CLIArgType::TRIMMED_STRING, false, false,
    4747        "set GPU architecture for Gallium/raw binaries", "ARCH" },
     48    { "wave32", '3', CLIArgType::NONE, false, false,
     49        "set wavefront size as 32 elements", nullptr },
    4850    { "driverVersion", 't', CLIArgType::UINT, false, false,
    4951        "set driver version (for Amd/GalliumCompute)", "VERSION" },
     
    138140    if (cli.hasLongOption("oldModParam"))
    139141        flags |= ASM_OLDMODPARAM;
     142    if (cli.hasShortOption('3'))
     143        flags |= ASM_WAVE32;
    140144    if (cli.hasLongOption("newROCmBinFormat"))
    141145        newROCmBinFormat = true;
  • CLRadeonExtender/trunk/programs/clrxasm.pod

    r4652 r4875  
    77=head1 SYNOPSIS
    88
    9 clrxasm [-6Swam?] [-D SYM[=VALUE]] [-I PATH] [-o OUTFILE] [-b BINFORMAT]
     9clrxasm [-63Swam?] [-D SYM[=VALUE]] [-I PATH] [-o OUTFILE] [-b BINFORMAT]
    1010[-g GPUDEVICE] [-A ARCH] [-t VERSION] [--defsym=SYM[=VALUE]] [--includePath=PATH]
    1111[--output OUTFILE] [--binaryFormat=BINFORMAT] [--64bit] [--gpuType=GPUDEVICE]
    1212[--arch=ARCH] [--driverVersion=VERSION] [--llvmVersion=VERSION] [--newROCmBinFormat]
    1313[--forceAddSymbols] [--noWarnings] [--alternate] [--buggyFPLit] [--oldModParam]
    14 [--noMacroCase] [--policy=VERSION] [--help] [--usage] [--version] [file...]
     14[--noMacroCase] [--wave32] [--policy=VERSION] [--help] [--usage] [--version] [file...]
    1515
    1616=head1 DESCRIPTION
     
    114114Do not ignore letter's case in macro names (by default is ignored).
    115115
     116=item B<-3>, B<--wave32>
     117
     118Set wavefront size as 32 elements (apply only for GFX10 devices).
     119
    116120=item B<--policy=VERSION>
    117121
Note: See TracChangeset for help on using the changeset viewer.