source: CLRX/CLRadeonExtender/trunk/utils/GPUId.cpp @ 3348

Last change on this file since 3348 was 3348, checked in by matszpk, 13 months ago

CLRadeonExtender: Move routines to calculate PGMRSRC1 and PGMRSRC2 to GPUId code. Apply these function in code.

File size: 8.9 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2017 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19
20#include <CLRX/Config.h>
21#include <cstring>
22#include <utility>
23#include <cstdint>
24#include <climits>
25#include <CLRX/utils/Utilities.h>
26#include <CLRX/utils/GPUId.h>
27
28using namespace CLRX;
29
30/* TODO: add routines to calculate pgmRSRCs and localsize.
31 * use this in code */
32
33static const size_t gpuDeviceTableSize = 23;
34
35static const char* gpuDeviceNameTable[gpuDeviceTableSize] =
36{
37    "CapeVerde",
38    "Pitcairn",
39    "Tahiti",
40    "Oland",
41    "Bonaire",
42    "Spectre",
43    "Spooky",
44    "Kalindi",
45    "Hainan",
46    "Hawaii",
47    "Iceland",
48    "Tonga",
49    "Mullins",
50    "Fiji",
51    "Carrizo",
52    "Dummy",
53    "Goose",
54    "Horse",
55    "Stoney",
56    "Ellesmere",
57    "Baffin",
58    "GFX804",
59    "GFX900"
60};
61
62static std::pair<const char*, GPUDeviceType>
63lowerCaseGpuDeviceEntryTable[gpuDeviceTableSize] =
64{
65    { "baffin", GPUDeviceType::BAFFIN },
66    { "bonaire", GPUDeviceType::BONAIRE },
67    { "capeverde", GPUDeviceType::CAPE_VERDE },
68    { "carrizo", GPUDeviceType::CARRIZO },
69    { "dummy", GPUDeviceType::DUMMY },
70    { "ellesmere", GPUDeviceType::ELLESMERE },
71    { "fiji", GPUDeviceType::FIJI },
72    { "gfx804", GPUDeviceType::GFX804 },
73    { "gfx900", GPUDeviceType::GFX900 },
74    { "goose", GPUDeviceType::GOOSE },
75    { "hainan", GPUDeviceType::HAINAN },
76    { "hawaii", GPUDeviceType::HAWAII },
77    { "horse", GPUDeviceType::HORSE },
78    { "iceland", GPUDeviceType::ICELAND },
79    { "kalindi", GPUDeviceType::KALINDI },
80    { "mullins", GPUDeviceType::MULLINS },
81    { "oland", GPUDeviceType::OLAND },
82    { "pitcairn", GPUDeviceType::PITCAIRN },
83    { "spectre", GPUDeviceType::SPECTRE },
84    { "spooky", GPUDeviceType::SPOOKY },
85    { "stoney", GPUDeviceType::STONEY },
86    { "tahiti", GPUDeviceType::TAHITI },
87    { "tonga", GPUDeviceType::TONGA }
88};
89
90static const GPUArchitecture gpuDeviceArchTable[gpuDeviceTableSize] =
91{
92    GPUArchitecture::GCN1_0, // CapeVerde
93    GPUArchitecture::GCN1_0, // Pitcairn
94    GPUArchitecture::GCN1_0, // Tahiti
95    GPUArchitecture::GCN1_0, // Oland
96    GPUArchitecture::GCN1_1, // Bonaire
97    GPUArchitecture::GCN1_1, // Spectre
98    GPUArchitecture::GCN1_1, // Spooky
99    GPUArchitecture::GCN1_1, // Kalindi
100    GPUArchitecture::GCN1_0, // Hainan
101    GPUArchitecture::GCN1_1, // Hawaii
102    GPUArchitecture::GCN1_2, // Iceland
103    GPUArchitecture::GCN1_2, // Tonga
104    GPUArchitecture::GCN1_1, // Mullins
105    GPUArchitecture::GCN1_2, // Fiji
106    GPUArchitecture::GCN1_2, // Carrizo
107    GPUArchitecture::GCN1_2, // Dummy
108    GPUArchitecture::GCN1_2, // Goose
109    GPUArchitecture::GCN1_2, // Horse
110    GPUArchitecture::GCN1_2, // Stoney
111    GPUArchitecture::GCN1_2, // Ellesmere
112    GPUArchitecture::GCN1_2, // Baffin
113    GPUArchitecture::GCN1_2, // GFX804
114    GPUArchitecture::GCN1_4  // GFX900
115};
116
117static const char* gpuArchitectureNameTable[4] =
118{
119    "GCN1.0",
120    "GCN1.1",
121    "GCN1.2",
122    "GCN1.4"
123};
124
125static const char* gpuArchitectureNameTable2[12] =
126{
127    "GCN1.0", "GFX6", "SI",
128    "GCN1.1", "GFX7", "CI",
129    "GCN1.2", "GFX8", "VI",
130    "GCN1.4", "GFX9", "Vega"
131};
132
133static const GPUDeviceType gpuLowestDeviceFromArchTable[4] =
134{
135    GPUDeviceType::CAPE_VERDE,
136    GPUDeviceType::BONAIRE,
137    GPUDeviceType::TONGA,
138    GPUDeviceType::GFX900
139};
140
141GPUDeviceType CLRX::getGPUDeviceTypeFromName(const char* name)
142{
143    auto it = binaryMapFind(lowerCaseGpuDeviceEntryTable,
144                 lowerCaseGpuDeviceEntryTable+gpuDeviceTableSize, name, CStringCaseLess());
145    if (it == lowerCaseGpuDeviceEntryTable+gpuDeviceTableSize)
146        throw Exception("Unknown GPU device type");
147    return it->second;
148}
149
150GPUArchitecture CLRX::getGPUArchitectureFromName(const char* name)
151{
152    cxuint found = 0;
153    for (; found < sizeof gpuArchitectureNameTable2 /
154                sizeof(const char*); found++)
155        if (::strcasecmp(name, gpuArchitectureNameTable2[found]) == 0)
156            break;
157    if (found == sizeof(gpuArchitectureNameTable2) / sizeof(const char*))
158        throw Exception("Unknown GPU architecture");
159    return GPUArchitecture(found/3);
160}
161
162GPUArchitecture CLRX::getGPUArchitectureFromDeviceType(GPUDeviceType deviceType)
163{
164    if (deviceType > GPUDeviceType::GPUDEVICE_MAX)
165        throw Exception("Unknown GPU device type");
166    return gpuDeviceArchTable[cxuint(deviceType)];
167}
168
169GPUDeviceType CLRX::getLowestGPUDeviceTypeFromArchitecture(GPUArchitecture architecture)
170{
171    if (architecture > GPUArchitecture::GPUARCH_MAX)
172        throw Exception("Unknown GPU architecture");
173    return gpuLowestDeviceFromArchTable[cxuint(architecture)];
174}
175
176const char* CLRX::getGPUDeviceTypeName(GPUDeviceType deviceType)
177{
178    if (deviceType > GPUDeviceType::GPUDEVICE_MAX)
179        throw Exception("Unknown GPU device type");
180    return gpuDeviceNameTable[cxuint(deviceType)];
181}
182
183const char* CLRX::getGPUArchitectureName(GPUArchitecture architecture)
184{
185    if (architecture > GPUArchitecture::GPUARCH_MAX)
186        throw Exception("Unknown GPU architecture");
187    return gpuArchitectureNameTable[cxuint(architecture)];
188}
189
190cxuint CLRX::getGPUMaxRegistersNum(GPUArchitecture architecture, cxuint regType,
191                         Flags flags)
192{
193    if (architecture > GPUArchitecture::GPUARCH_MAX)
194        throw Exception("Unknown GPU architecture");
195    if (regType == REGTYPE_VGPR)
196        return 256; // VGPRS
197    cxuint maxSgprs = (architecture>=GPUArchitecture::GCN1_2) ? 102 : 104;
198    if ((flags & REGCOUNT_NO_FLAT)!=0 && (architecture>GPUArchitecture::GCN1_0))
199        maxSgprs -= (architecture>=GPUArchitecture::GCN1_2) ? 6 : 4;
200    else if ((flags & REGCOUNT_NO_XNACK)!=0 && (architecture>GPUArchitecture::GCN1_1))
201        maxSgprs -= 4;
202    else if ((flags & REGCOUNT_NO_VCC)!=0)
203        maxSgprs -= 2;
204    return maxSgprs;
205}
206
207cxuint CLRX::getGPUMaxRegsNumByArchMask(uint16_t archMask, cxuint regType)
208{
209    if (regType == REGTYPE_VGPR)
210        return 256;
211    else
212        return (archMask&(7U<<int(GPUArchitecture::GCN1_2))) ? 102 : 104;
213}
214
215void CLRX::getGPUSetupMinRegistersNum(GPUArchitecture architecture, cxuint dimMask,
216              cxuint userDataNum, Flags flags, cxuint* gprsOut)
217{
218    /// SGPRs
219    gprsOut[0] = ((dimMask&1)!=0) + ((dimMask&2)!=0) + ((dimMask&4)!=0);
220    /// VGPRS
221    gprsOut[1] = ((dimMask&4) ? 3 : ((dimMask&2) ? 2: (dimMask&1) ? 1 : 0));
222    gprsOut[0] += userDataNum + ((flags & GPUSETUP_TGSIZE_EN)!=0) +
223            ((flags & GPUSETUP_SCRATCH_EN)!=0);
224}
225
226size_t CLRX::getGPUMaxLocalSize(GPUArchitecture architecture)
227{
228    return 32768;
229}
230
231size_t CLRX::getGPUMaxGDSSize(GPUArchitecture architecture)
232{
233    return 65536;
234}
235
236cxuint CLRX::getGPUExtraRegsNum(GPUArchitecture architecture, cxuint regType, Flags flags)
237{
238    if (regType == 1)
239        return 0;
240    if ((flags & GCN_FLAT)!=0 && (architecture>GPUArchitecture::GCN1_0))
241        return (architecture>=GPUArchitecture::GCN1_2) ? 6 : 4;
242    else if ((flags & GCN_XNACK)!=0 && (architecture>GPUArchitecture::GCN1_1))
243        return 4;
244    else if ((flags & GCN_VCC)!=0)
245        return 2;
246    return 0;
247}
248
249uint32_t CLRX::calculatePgmRSrc1(GPUArchitecture arch, cxuint vgprsNum, cxuint sgprsNum,
250            cxuint priority, cxuint floatMode, bool privMode, bool dx10Clamp,
251            bool debugMode, bool ieeeMode)
252{
253    return ((vgprsNum-1)>>2) | (((sgprsNum-1)>>3)<<6) |
254            ((uint32_t(floatMode)&0xff)<<12) |
255            (ieeeMode?1U<<23:0) | (uint32_t(priority&3)<<10) |
256            (privMode?1U<<20:0) | (dx10Clamp?1U<<21:0) |
257            (debugMode?1U<<22:0);
258}
259
260uint32_t CLRX::calculatePgmRSrc2(GPUArchitecture arch, bool scratchEn, cxuint userDataNum,
261            bool trapPresent, cxuint dimMask, cxuint defDimValues, bool tgSizeEn,
262            cxuint ldsSize, cxuint exceptions)
263{
264    const cxuint ldsShift = arch<GPUArchitecture::GCN1_1 ? 8 : 9;
265    const uint32_t ldsMask = (1U<<ldsShift)-1U;
266    uint32_t dimValues = 0;
267    if (dimMask != UINT_MAX)
268        dimValues = ((dimMask&7)<<7) | (((dimMask&4) ? 2 : (dimMask&2) ? 1 : 0)<<11);
269    else //
270        dimValues = defDimValues;
271    return uint32_t(scratchEn ? 1U : 0U) | (uint32_t(userDataNum)<<1) |
272            dimValues | (tgSizeEn ? 0x400U : 0U) | (trapPresent ? 0x40U : 0U) |
273            (((uint32_t(ldsSize+ldsMask)>>ldsShift)&0x1ff)<<15) |
274            ((uint32_t(exceptions)&0x7f)<<24);
275}
276
Note: See TracBrowser for help on using the repository browser.