source: CLRX/CLRadeonExtender/trunk/utils/GPUId.cpp @ 3439

Last change on this file since 3439 was 3439, checked in by matszpk, 21 months ago

CLRadeonExtender: Fixed getGPUDeviceTypeFromName.

File size: 10.1 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2017 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19
20#include <CLRX/Config.h>
21#include <cstring>
22#include <utility>
23#include <cstdint>
24#include <climits>
25#include <CLRX/utils/Utilities.h>
26#include <CLRX/utils/GPUId.h>
27
28using namespace CLRX;
29
30// length of GPU device table (number of recognized GPU devices)
31static const size_t gpuDeviceTableSize = 24;
32
33static const char* gpuDeviceNameTable[gpuDeviceTableSize] =
34{
35    "CapeVerde",
36    "Pitcairn",
37    "Tahiti",
38    "Oland",
39    "Bonaire",
40    "Spectre",
41    "Spooky",
42    "Kalindi",
43    "Hainan",
44    "Hawaii",
45    "Iceland",
46    "Tonga",
47    "Mullins",
48    "Fiji",
49    "Carrizo",
50    "Dummy",
51    "Goose",
52    "Horse",
53    "Stoney",
54    "Ellesmere",
55    "Baffin",
56    "GFX804",
57    "GFX900",
58    "GFX901"
59};
60
61// sorted GPU device names with device types
62static const std::pair<const char*, GPUDeviceType>
63lowerCaseGpuDeviceEntryTable[] =
64{
65    { "baffin", GPUDeviceType::BAFFIN },
66    { "bonaire", GPUDeviceType::BONAIRE },
67    { "capeverde", GPUDeviceType::CAPE_VERDE },
68    { "carrizo", GPUDeviceType::CARRIZO },
69    { "dummy", GPUDeviceType::DUMMY },
70    { "ellesmere", GPUDeviceType::ELLESMERE },
71    { "fiji", GPUDeviceType::FIJI },
72    { "gfx804", GPUDeviceType::GFX804 },
73    { "gfx900", GPUDeviceType::GFX900 },
74    { "gfx901", GPUDeviceType::GFX901 },
75    { "goose", GPUDeviceType::GOOSE },
76    { "hainan", GPUDeviceType::HAINAN },
77    { "hawaii", GPUDeviceType::HAWAII },
78    { "horse", GPUDeviceType::HORSE },
79    { "iceland", GPUDeviceType::ICELAND },
80    { "kalindi", GPUDeviceType::KALINDI },
81    { "mullins", GPUDeviceType::MULLINS },
82    { "oland", GPUDeviceType::OLAND },
83    { "pitcairn", GPUDeviceType::PITCAIRN },
84    { "polaris10", GPUDeviceType::ELLESMERE },
85    { "polaris11", GPUDeviceType::BAFFIN },
86    { "polaris12", GPUDeviceType::GFX804 },
87    { "polaris20", GPUDeviceType::ELLESMERE },
88    { "polaris21", GPUDeviceType::BAFFIN },
89    { "polaris22", GPUDeviceType::GFX804 },
90    { "raven", GPUDeviceType::GFX901 },
91    { "spectre", GPUDeviceType::SPECTRE },
92    { "spooky", GPUDeviceType::SPOOKY },
93    { "stoney", GPUDeviceType::STONEY },
94    { "tahiti", GPUDeviceType::TAHITI },
95    { "tonga", GPUDeviceType::TONGA },
96    { "topaz", GPUDeviceType::ICELAND },
97    { "vega10", GPUDeviceType::GFX900 },
98    { "vega11", GPUDeviceType::GFX901 }
99};
100
101static const size_t lowerCaseGpuDeviceEntryTableSize =
102    sizeof(lowerCaseGpuDeviceEntryTable) / sizeof(std::pair<const char*, GPUDeviceType>);
103
104// table of architectures for specific GPU devices
105static const GPUArchitecture gpuDeviceArchTable[gpuDeviceTableSize] =
106{
107    GPUArchitecture::GCN1_0, // CapeVerde
108    GPUArchitecture::GCN1_0, // Pitcairn
109    GPUArchitecture::GCN1_0, // Tahiti
110    GPUArchitecture::GCN1_0, // Oland
111    GPUArchitecture::GCN1_1, // Bonaire
112    GPUArchitecture::GCN1_1, // Spectre
113    GPUArchitecture::GCN1_1, // Spooky
114    GPUArchitecture::GCN1_1, // Kalindi
115    GPUArchitecture::GCN1_0, // Hainan
116    GPUArchitecture::GCN1_1, // Hawaii
117    GPUArchitecture::GCN1_2, // Iceland
118    GPUArchitecture::GCN1_2, // Tonga
119    GPUArchitecture::GCN1_1, // Mullins
120    GPUArchitecture::GCN1_2, // Fiji
121    GPUArchitecture::GCN1_2, // Carrizo
122    GPUArchitecture::GCN1_2, // Dummy
123    GPUArchitecture::GCN1_2, // Goose
124    GPUArchitecture::GCN1_2, // Horse
125    GPUArchitecture::GCN1_2, // Stoney
126    GPUArchitecture::GCN1_2, // Ellesmere
127    GPUArchitecture::GCN1_2, // Baffin
128    GPUArchitecture::GCN1_2, // GFX804
129    GPUArchitecture::GCN1_4, // GFX900
130    GPUArchitecture::GCN1_4  // GFX901
131};
132
133static const char* gpuArchitectureNameTable[4] =
134{
135    "GCN1.0",
136    "GCN1.1",
137    "GCN1.2",
138    "GCN1.4"
139};
140
141/* three names for every architecture (GCN, GFX?, Shortcut) used by recognizing
142 * architecture by name */
143static const char* gpuArchitectureNameTable2[12] =
144{
145    "GCN1.0", "GFX6", "SI",
146    "GCN1.1", "GFX7", "CI",
147    "GCN1.2", "GFX8", "VI",
148    "GCN1.4", "GFX9", "Vega"
149};
150
151/// lowest device for architecture
152static const GPUDeviceType gpuLowestDeviceFromArchTable[4] =
153{
154    GPUDeviceType::CAPE_VERDE,
155    GPUDeviceType::BONAIRE,
156    GPUDeviceType::ICELAND,
157    GPUDeviceType::GFX900
158};
159
160GPUDeviceType CLRX::getGPUDeviceTypeFromName(const char* name)
161{
162    auto it = binaryMapFind(lowerCaseGpuDeviceEntryTable,
163                 lowerCaseGpuDeviceEntryTable+lowerCaseGpuDeviceEntryTableSize,
164                 name, CStringCaseLess());
165    if (it == lowerCaseGpuDeviceEntryTable+lowerCaseGpuDeviceEntryTableSize)
166        throw Exception("Unknown GPU device type");
167    return it->second;
168}
169
170GPUArchitecture CLRX::getGPUArchitectureFromName(const char* name)
171{
172    cxuint found = 0;
173    for (; found < sizeof gpuArchitectureNameTable2 /
174                sizeof(const char*); found++)
175        if (::strcasecmp(name, gpuArchitectureNameTable2[found]) == 0)
176            break;
177    if (found == sizeof(gpuArchitectureNameTable2) / sizeof(const char*))
178        throw Exception("Unknown GPU architecture");
179    return GPUArchitecture(found/3);
180}
181
182GPUArchitecture CLRX::getGPUArchitectureFromDeviceType(GPUDeviceType deviceType)
183{
184    if (deviceType > GPUDeviceType::GPUDEVICE_MAX)
185        throw Exception("Unknown GPU device type");
186    return gpuDeviceArchTable[cxuint(deviceType)];
187}
188
189GPUDeviceType CLRX::getLowestGPUDeviceTypeFromArchitecture(GPUArchitecture architecture)
190{
191    if (architecture > GPUArchitecture::GPUARCH_MAX)
192        throw Exception("Unknown GPU architecture");
193    return gpuLowestDeviceFromArchTable[cxuint(architecture)];
194}
195
196const char* CLRX::getGPUDeviceTypeName(GPUDeviceType deviceType)
197{
198    if (deviceType > GPUDeviceType::GPUDEVICE_MAX)
199        throw Exception("Unknown GPU device type");
200    return gpuDeviceNameTable[cxuint(deviceType)];
201}
202
203const char* CLRX::getGPUArchitectureName(GPUArchitecture architecture)
204{
205    if (architecture > GPUArchitecture::GPUARCH_MAX)
206        throw Exception("Unknown GPU architecture");
207    return gpuArchitectureNameTable[cxuint(architecture)];
208}
209
210cxuint CLRX::getGPUMaxRegistersNum(GPUArchitecture architecture, cxuint regType,
211                         Flags flags)
212{
213    if (architecture > GPUArchitecture::GPUARCH_MAX)
214        throw Exception("Unknown GPU architecture");
215    if (regType == REGTYPE_VGPR)
216        return 256; // VGPRS
217    cxuint maxSgprs = (architecture>=GPUArchitecture::GCN1_2) ? 102 : 104;
218    // subtract from max SGPRs num number of special registers (VCC, ...)
219    if ((flags & REGCOUNT_NO_FLAT)!=0 && (architecture>GPUArchitecture::GCN1_0))
220        maxSgprs -= (architecture>=GPUArchitecture::GCN1_2) ? 6 : 4;
221    else if ((flags & REGCOUNT_NO_XNACK)!=0 && (architecture>GPUArchitecture::GCN1_1))
222        maxSgprs -= 4;
223    else if ((flags & REGCOUNT_NO_VCC)!=0)
224        maxSgprs -= 2;
225    return maxSgprs;
226}
227
228cxuint CLRX::getGPUMaxRegsNumByArchMask(uint16_t archMask, cxuint regType)
229{
230    if (regType == REGTYPE_VGPR)
231        return 256;
232    else
233        return (archMask&(7U<<int(GPUArchitecture::GCN1_2))) ? 102 : 104;
234}
235
236void CLRX::getGPUSetupMinRegistersNum(GPUArchitecture architecture, cxuint dimMask,
237              cxuint userDataNum, Flags flags, cxuint* gprsOut)
238{
239    /// SGPRs
240    gprsOut[0] = ((dimMask&1)!=0) + ((dimMask&2)!=0) + ((dimMask&4)!=0);
241    /// VGPRS
242    gprsOut[1] = ((dimMask&4) ? 3 : ((dimMask&2) ? 2: (dimMask&1) ? 1 : 0));
243    gprsOut[0] += userDataNum + ((flags & GPUSETUP_TGSIZE_EN)!=0) +
244            ((flags & GPUSETUP_SCRATCH_EN)!=0);
245}
246
247size_t CLRX::getGPUMaxLocalSize(GPUArchitecture architecture)
248{
249    return 32768;
250}
251
252size_t CLRX::getGPUMaxGDSSize(GPUArchitecture architecture)
253{
254    return 65536;
255}
256
257// get extra (special) registers depends on architectures and flags
258cxuint CLRX::getGPUExtraRegsNum(GPUArchitecture architecture, cxuint regType, Flags flags)
259{
260    if (regType == REGTYPE_VGPR)
261        return 0;
262    if ((flags & GCN_FLAT)!=0 && (architecture>GPUArchitecture::GCN1_0))
263        return (architecture>=GPUArchitecture::GCN1_2) ? 6 : 4;
264    else if ((flags & GCN_XNACK)!=0 && (architecture>GPUArchitecture::GCN1_1))
265        return 4;
266    else if ((flags & GCN_VCC)!=0)
267        return 2;
268    return 0;
269}
270
271uint32_t CLRX::calculatePgmRSrc1(GPUArchitecture arch, cxuint vgprsNum, cxuint sgprsNum,
272            cxuint priority, cxuint floatMode, bool privMode, bool dx10Clamp,
273            bool debugMode, bool ieeeMode)
274{
275    return ((vgprsNum-1)>>2) | (((sgprsNum-1)>>3)<<6) |
276            ((uint32_t(floatMode)&0xff)<<12) |
277            (ieeeMode?1U<<23:0) | (uint32_t(priority&3)<<10) |
278            (privMode?1U<<20:0) | (dx10Clamp?1U<<21:0) |
279            (debugMode?1U<<22:0);
280}
281
282uint32_t CLRX::calculatePgmRSrc2(GPUArchitecture arch, bool scratchEn, cxuint userDataNum,
283            bool trapPresent, cxuint dimMask, cxuint defDimValues, bool tgSizeEn,
284            cxuint ldsSize, cxuint exceptions)
285{
286    // GCN1.1 and later have 512 byte banks instead 256
287    const cxuint ldsShift = arch<GPUArchitecture::GCN1_1 ? 8 : 9;
288    const uint32_t ldsMask = (1U<<ldsShift)-1U;
289    uint32_t dimValues = 0;
290    // calculate dimMask (TGID_X_EN, ..., TIDIG_COMP_CNT fields)
291    if (dimMask != UINT_MAX)
292        dimValues = ((dimMask&7)<<7) | (((dimMask&4) ? 2 : (dimMask&2) ? 1 : 0)<<11);
293    else // use default value for dimensions
294        dimValues = defDimValues;
295    return uint32_t(scratchEn ? 1U : 0U) | (uint32_t(userDataNum)<<1) |
296            dimValues | (tgSizeEn ? 0x400U : 0U) | (trapPresent ? 0x40U : 0U) |
297            (((uint32_t(ldsSize+ldsMask)>>ldsShift)&0x1ff)<<15) |
298            ((uint32_t(exceptions)&0x7f)<<24);
299}
300
Note: See TracBrowser for help on using the repository browser.