Changeset 3256 in CLRX


Ignore:
Timestamp:
Aug 16, 2017, 8:13:55 PM (13 months ago)
Author:
matszpk
Message:

CLRadeonExtender: Gallium/ROCm: Add always extra VCC register to SGPR pool.
GPUId: Fixed name of bitmask in getGPUExtraRegsNum. Update CLRXDocs.

Location:
CLRadeonExtender/trunk
Files:
8 edited

Legend:

Unmodified
Added
Removed
  • CLRadeonExtender/trunk/amdasm/AsmGalliumFormat.cpp

    r3230 r3256  
    12691269            config.usedSGPRsNum = std::min(
    12701270                std::max(minRegsNum[0], kernelStates[i].allocRegs[0]) +
    1271                     getGPUExtraRegsNum(arch, REGTYPE_SGPR, kernelStates[i].allocRegFlags),
     1271                    getGPUExtraRegsNum(arch, REGTYPE_SGPR,
     1272                        kernelStates[i].allocRegFlags|GCN_VCC),
    12721273                    maxSGPRsNum); // include all extra sgprs
    12731274        }
  • CLRadeonExtender/trunk/amdasm/AsmROCmFormat.cpp

    r3194 r3256  
    15361536            config.usedSGPRsNum = std::min(
    15371537                std::max(minRegsNum[0], kernelStates[i]->allocRegs[0]) +
    1538                     getGPUExtraRegsNum(arch, REGTYPE_SGPR, flags),
     1538                    getGPUExtraRegsNum(arch, REGTYPE_SGPR, flags|GCN_VCC),
    15391539                    maxSGPRsNum); // include all extra sgprs
    15401540        }
  • CLRadeonExtender/trunk/doc/ClrxAsmGallium.md

    r3192 r3256  
    2828
    2929Assembler for GalliumCompute format counts all SGPR registers and add extra registers
    30 (VCC, FLAT_SCRATCH, XNACK_MASK) if any used to register pool. By default no extra register
    31 is added.
     30(VCC, FLAT_SCRATCH, XNACK_MASK) if any used to register pool.
     31 The VCC register is included by default.
    3232
    3333## List of the specific pseudo-operations
  • CLRadeonExtender/trunk/doc/ClrxAsmRocm.md

    r2895 r3256  
    2828
    2929Assembler for ROCm format counts all SGPR registers and add extra registers
    30 (VCC, FLAT_SCRATCH, XNACK_MASK) if any used to register pool. Special fields determines
    31 what extra SGPR extra has been added.
     30(FLAT_SCRATCH, XNACK_MASK) if any used to register pool. Special fields determines
     31what extra SGPR extra has been added. The VCC register is included by default.
    3232
    3333## List of the specific pseudo-operations
  • CLRadeonExtender/trunk/tests/amdasm/AsmGalliumFormat.cpp

    r2682 r3256  
    275275  Kernel: name=aa23, offset=0
    276276    Config:
    277       dims=6, SGPRS=6, VGPRS=3, pgmRSRC2=0x0, ieeeMode=0x1
     277      dims=6, SGPRS=8, VGPRS=3, pgmRSRC2=0x0, ieeeMode=0x1
    278278      floatMode=0xc0, priority=3, localSize=0, scratchBuffer=0
    279279    Arg: scalar, true, griddim, size=8, tgtSize=8, tgtAlign=8
  • CLRadeonExtender/trunk/tests/amdasm/AsmROCmFormat.cpp

    r3194 r3256  
    217217      kernelCodePrefetchSize=0
    218218      maxScrachBackingMemorySize=0
    219       computePgmRsrc1=0x3c0000
     219      computePgmRsrc1=0x3c0040
    220220      computePgmRsrc2=0xa008081
    221221      enableSgprRegisterFlags=0x0
     
    226226      kernargSegmentSize=0
    227227      workgroupFbarrierCount=3324
    228       wavefrontSgprCount=8
     228      wavefrontSgprCount=10
    229229      workitemVgprCount=1
    230230      reservedVgprFirst=7
     
    262262      kernargSegmentSize=0
    263263      workgroupFbarrierCount=0
    264       wavefrontSgprCount=3
     264      wavefrontSgprCount=5
    265265      workitemVgprCount=1
    266266      reservedVgprFirst=0
     
    285285  Code:
    286286  0100000000000000010008000000030000010000000000000000000000000000
    287   0000000000000000000000000000000000003c008180000a000006006f000000
    288   16000000640000000000000000000000fc0c00000800010007000b0009000400
     287  0000000000000000000000000000000040003c008180000a000006006f000000
     288  16000000640000000000000000000000fc0c00000a00010007000b0009000400
    289289  6000620005040706ac4d03000000000000000000000000003a8bc94d00000000
    290290  0100000002000000030000000000000000000000000000000000000000000000
     
    302302  0100000000000000010008000000030000010000000000000000000000000000
    303303  0000000000000000000000000000000000000c00840000000800000000000000
    304   0000000000000000000000000000000000000000030001000000000000000000
     304  0000000000000000000000000000000000000000050001000000000000000000
    305305  0000000004040406342211010000000000000000000000000000000000000000
    306306  dededededededededededededededededededededededededededededededede
  • CLRadeonExtender/trunk/tests/amdasm/AsmRegPool.cpp

    r2822 r3256  
    110110    /* regflags test */
    111111    { ".gallium;.kernel xx;.config;.text;xx:s_xor_b64 "
    112         "s[10:11], s[4:5], s[62:63]", { { "xx", 12, 0 } } },
     112        "s[10:11], s[4:5], s[62:63]", { { "xx", 14, 0 } } },
    113113    { ".gallium;.kernel xx;.config;.text;xx:s_xor_b64 s[10:11], s[4:5], vcc",
    114114        { { "xx", 14, 0 } } },
     
    199199    .kcodeend
    200200.kcodeend)ffDXD",
    201         { { "kx0", 11, 5 }, { "kx1", 15, 15 }, { "kx2", 20, 0 },
    202             { "kx3", 17, 8 }, { "kx4", 9, 8 }, { "kx5", 12, 8 } }
     201        { { "kx0", 13, 5 }, { "kx1", 17, 15 }, { "kx2", 22, 0 },
     202            { "kx3", 19, 8 }, { "kx4", 11, 8 }, { "kx5", 14, 8 } }
    203203    },
    204204    {
     
    238238            v_sub_f32 v4,v1,v2
    239239.kcodeend)ffDXD",
    240         { { "kx0", 11, 5 }, { "kx1", 15, 5 }, { "kx2", 20, 5 },
    241             { "kx3", 17, 5 }, { "kx4", 9, 5 }, { "kx5", 12, 5 } }
     240        { { "kx0", 13, 5 }, { "kx1", 17, 5 }, { "kx2", 22, 5 },
     241            { "kx3", 19, 5 }, { "kx4", 11, 5 }, { "kx5", 14, 5 } }
    242242    },
    243243    {
     
    299299    .kcodeend
    300300.kcodeend)ffDXD",
    301         { { "kx0", 11, 25 }, { "kx1", 15, 42 }, { "kx2", 20, 16 }, { "kx3", 17, 42 },
    302           { "kx4", 9, 19 }, { "kx5", 12, 25 }, { "kx6", 22, 18 }, { "kx7", 24, 35 } }
     301        { { "kx0", 13, 25 }, { "kx1", 17, 42 }, { "kx2", 22, 16 }, { "kx3", 19, 42 },
     302          { "kx4", 11, 19 }, { "kx5", 14, 25 }, { "kx6", 24, 18 }, { "kx7", 26, 35 } }
    303303    },
    304304   
     
    378378    .kcodeend
    379379.kcodeend)ffDXD",
    380         { { "kx0", 11, 25 }, { "kx1", 15, 42 }, { "kx2", 20, 16 }, { "kx3", 17, 42 },
    381           { "kx4", 9, 19 }, { "kx5", 12, 25 }, { "kx6", 22, 18 }, { "kx7", 24, 35 } }
     380        { { "kx0", 13, 25 }, { "kx1", 17, 42 }, { "kx2", 22, 16 }, { "kx3", 19, 42 },
     381          { "kx4", 11, 19 }, { "kx5", 14, 25 }, { "kx6", 24, 18 }, { "kx7", 26, 35 } }
    382382    }
    383383};
  • CLRadeonExtender/trunk/utils/GPUId.cpp

    r3194 r3256  
    240240    else if ((flags & GCN_XNACK)!=0 && (architecture>GPUArchitecture::GCN1_1))
    241241        return 4;
    242     else if ((flags & REGCOUNT_NO_VCC)!=0)
     242    else if ((flags & GCN_VCC)!=0)
    243243        return 2;
    244244    return 0;
Note: See TracChangeset for help on using the changeset viewer.