CLRX  1
An unofficial OpenCL extensions designed for Radeon GPUs
GPUId.h
Go to the documentation of this file.
1 /*
2  * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3  * Copyright (C) 2014-2018 Mateusz Szpakowski
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
23 #ifndef __CLRX_GPUID_H__
24 #define __CLRX_GPUID_H__
25 
26 #include <CLRX/Config.h>
27 #include <CLRX/utils/Utilities.h>
28 #include <string>
29 
31 namespace CLRX
32 {
33 
36 {
37 public:
39  GPUIdException() = default;
41  explicit GPUIdException(const std::string& message);
43  virtual ~GPUIdException() noexcept = default;
44 };
45 
46 /*
47  * GPU identification utilities
48  */
49 
51 enum class GPUDeviceType: cxbyte
52 {
53  CAPE_VERDE = 0,
54  PITCAIRN,
55  TAHITI,
56  OLAND,
57  BONAIRE,
58  SPECTRE,
59  SPOOKY,
60  KALINDI,
61  HAINAN,
62  HAWAII,
63  ICELAND,
64  TONGA,
65  MULLINS,
66  FIJI,
67  CARRIZO,
68  DUMMY,
69  GOOSE,
70  HORSE,
71  STONEY,
72  ELLESMERE,
73  BAFFIN,
74  GFX804,
75  GFX900,
76  GFX901,
77  GFX902,
78  GFX903,
79  GFX904,
80  GFX905,
81  GFX906,
82  GFX907,
83  GFX1000,
84  GFX1010,
85  GFX1011,
86  GPUDEVICE_MAX = GFX1011,
87 
93  RADEON_R9_290 = HAWAII
94 };
95 
98 {
99  GCN1_0 = 0,
100  GCN1_1,
101  GCN1_2,
102  GCN1_4,
103  GCN1_4_1,
104  GCN1_5,
105  GCN1_5_1,
106  GPUARCH_MAX = GCN1_5_1
107 };
108 
110 typedef uint16_t GPUArchMask;
111 
112 // GCN architecture masks (bit represents architecture)
113 enum : GPUArchMask
114 {
115  ARCH_SOUTHERN_ISLANDS = 1,
116  ARCH_SEA_ISLANDS = 2,
117  ARCH_VOLCANIC_ISLANDS = 4,
118  ARCH_HD7X00 = 1,
119  ARCH_RX2X0 = 2,
120  ARCH_RX3X0 = 4,
121  ARCH_RXVEGA = 8,
122  ARCH_VEGA20 = 16,
123  ARCH_NAVI = 32,
124  ARCH_NAVI_DL = 64,
125  ARCH_GCN_1_0_1 = 0x3,
126  ARCH_GCN_1_1_2 = 0x6,
127  ARCH_GCN_1_1_5 = 0x62,
128  ARCH_GCN_1_1_2_4 = 0x1e,
129  ARCH_GCN_1_0_1_2_4 = 0x1f,
130  ARCH_GCN_1_2_4 = 0x1c,
131  ARCH_GCN_1_4 = 0x18,
132  ARCH_GCN_1_5 = 0x60,
133  ARCH_GCN_1_5_1 = 0x40,
134  ARCH_GCN_1_1_2_4_5 = 0x7e,
135  ARCH_GCN_1_2_4_5 = 0x7c,
136  ARCH_GCN_1_4_5 = 0x78,
137  ARCH_GCN_1_0_1_5 = 0x63,
138  ARCH_GCN_ALL = 0xffff
139 };
140 
142 extern GPUDeviceType getGPUDeviceTypeFromName(const char* name);
143 
145 extern const char* getGPUDeviceTypeName(GPUDeviceType deviceType);
146 
148 extern GPUArchitecture getGPUArchitectureFromName(const char* name);
149 
152 
155 
157 extern const char* getGPUArchitectureName(GPUArchitecture architecture);
158 
160 extern bool isThisGPUArchitecture(GPUArchitecture requiredArch, GPUArchitecture thisArch);
161 
162 enum: Flags {
163  REGCOUNT_NO_VCC = 1,
164  REGCOUNT_NO_FLAT = 2,
165  REGCOUNT_NO_XNACK = 4,
166  REGCOUNT_NO_EXTRA = 0xffff
167 };
168 
169 enum: cxuint {
170  REGTYPE_SGPR = 0,
171  REGTYPE_VGPR
172 };
173 
174 enum : Flags
175 {
176  GCN_VCC = 1,
177  GCN_FLAT = 2,
178  GCN_XNACK = 4,
179  GCN_REG_WAVE32 = 8
180 };
181 
182 enum: Flags {
183  GPUSETUP_TGSIZE_EN = 1,
184  GPUSETUP_SCRATCH_EN = 2
185 };
186 
187 enum: cxuint {
188  MAX_REGTYPES_NUM = 4
189 };
190 
192 extern cxuint getGPUMaxRegistersNum(GPUArchitecture architecture, cxuint regType,
193  Flags flags = 0);
194 
196 extern cxuint getGPUMaxRegsNumByArchMask(GPUArchMask archMask, cxuint regType);
197 
199 extern cxuint getGPUMaxAddrRegsNumByArchMask(GPUArchMask archMask, cxuint regType);
200 
202 extern bool isSpecialSGPRRegister(GPUArchMask archMask, cxuint index);
203 
205 extern void getGPUSetupMinRegistersNum(GPUArchitecture architecture, cxuint dimMask,
206  cxuint userDataNum, Flags flags, cxuint* gprsOut);
207 
209 extern cxuint getDefaultDimMask(GPUArchitecture architecture, uint32_t pgmRSRC2);
210 
212 extern size_t getGPUMaxLocalSize(GPUArchitecture architecture);
213 
215 extern size_t getGPUMaxGDSSize(GPUArchitecture architecture);
216 
218 extern cxuint getGPUExtraRegsNum(GPUArchitecture architecture, cxuint regType,
219  Flags flags);
220 
223 {
224  uint32_t major;
225  uint32_t minor;
226  uint32_t stepping;
227 };
228 
230 extern uint32_t calculatePgmRSrc1(GPUArchitecture arch, cxuint vgprsNum, cxuint sgprsNum,
231  cxuint priority, cxuint floatMode, bool privMode, bool dx10clamp,
232  bool debugMode, bool ieeeMode);
233 
235 extern uint32_t calculatePgmRSrc2(GPUArchitecture arch, bool scratchEn, cxuint userDataNum,
236  bool trapPresent, cxuint dimMask, cxuint defDimValues, bool tgSizeEn,
237  cxuint ldsSize, cxuint exceptions);
238 
239 extern uint32_t calculatePgmRSrc3(GPUArchitecture arch, cxuint sharedVgprsNum);
240 
243 {
244  AMDCL2 = 0,
245  OPENSOURCE,
246  ROCM
247 };
248 
251  GPUArchVersionTable table);
252 
253 // get GPU device type from architecture version
254 extern GPUDeviceType getGPUDeviceTypeFromArchVersion(cxuint archMajor, cxuint archMinor,
255  cxuint archStepping);
256 
257 };
258 
259 #endif
uint32_t Flags
type for declaring various flags
Definition: Utilities.h:100
first iteration (Radeon HD7000 series)
virtual ~GPUIdException() noexcept=default
destructor
structure helper for AMDGPU architecture version
Definition: GPUId.h:222
AMD OpenCL 2.0 format.
GPUArchitecture getGPUArchitectureFromDeviceType(GPUDeviceType deviceType)
get GPUArchitecture from GPU device type
uint16_t GPUArchMask
GPU architecture mask (one bit represents single GPU architecture)
Definition: GPUId.h:110
GPUArchitecture getGPUArchitectureFromName(const char *name)
get GPU architecture from name
cxuint getDefaultDimMask(GPUArchitecture architecture, uint32_t pgmRSRC2)
get default dimMask from PGMRSRC2
cxuint getGPUMaxRegsNumByArchMask(GPUArchMask archMask, cxuint regType)
get maximum available registers for GPU (type: 0 - scalar, 1 - vector)
GPUArchitecture
GPU architecture.
Definition: GPUId.h:97
GPUId exception class.
Definition: GPUId.h:35
GPUDeviceType getLowestGPUDeviceTypeFromArchitecture(GPUArchitecture arch)
get lowest GPU device for architecture
GFX9 architecture with NN extensions (AMD VEGA 20)
GPUArchVersionTable
ADMGPUArchValues table type.
Definition: GPUId.h:242
bool isSpecialSGPRRegister(GPUArchMask archMask, cxuint index)
get maximum number of scalar register + extra scalar reg (VCC, FLAT_SCRATCH, ...) ...
Configuration header.
void getGPUSetupMinRegistersNum(GPUArchitecture architecture, cxuint dimMask, cxuint userDataNum, Flags flags, cxuint *gprsOut)
get minimal number of required registers
bool isThisGPUArchitecture(GPUArchitecture requiredArch, GPUArchitecture thisArch)
check whether is this GPU architecture
uint32_t calculatePgmRSrc2(GPUArchitecture arch, bool scratchEn, cxuint userDataNum, bool trapPresent, cxuint dimMask, cxuint defDimValues, bool tgSizeEn, cxuint ldsSize, cxuint exceptions)
calculate PGMRSRC2 register value
const char * getGPUDeviceTypeName(GPUDeviceType deviceType)
get GPU device type name
size_t getGPUMaxLocalSize(GPUArchitecture architecture)
get maximum local size for GPU architecture
cxuint getGPUMaxRegistersNum(GPUArchitecture architecture, cxuint regType, Flags flags=0)
get maximum available registers for GPU (type: 0 - scalar, 1 - vector)
uint32_t major
arch major number
Definition: GPUId.h:224
uint32_t stepping
arch stepping number
Definition: GPUId.h:226
unsigned char cxbyte
unsigned byte
Definition: Config.h:229
uint32_t calculatePgmRSrc1(GPUArchitecture arch, cxuint vgprsNum, cxuint sgprsNum, cxuint priority, cxuint floatMode, bool privMode, bool dx10clamp, bool debugMode, bool ieeeMode)
calculate PGMRSRC1 register value
main namespace
Definition: AsmDefs.h:38
GFX9 architecture (AMD RX VEGA)
unsigned int cxuint
unsigned int
Definition: Config.h:237
GFX1011 architecture (AMD NAVI DLOPS)
size_t getGPUMaxGDSSize(GPUArchitecture architecture)
get maximum GDS size for GPU architecture
uint32_t minor
arch minor number
Definition: GPUId.h:225
cxuint getGPUMaxAddrRegsNumByArchMask(GPUArchMask archMask, cxuint regType)
get maximum available addressable registers for GPU (type: 0 - scalar, 1 - vector) ...
GPUDeviceType
type of GPU device
Definition: GPUId.h:51
third iteration (Radeon Rx 300 series and Tonga)
ROCm (RadeonOpenCompute) format.
GPUIdException()=default
empty constructor
utilities for other libraries and programs
const char * getGPUArchitectureName(GPUArchitecture architecture)
get GPU architecture name
std::string message
message
Definition: Utilities.h:64
GPUDeviceType getGPUDeviceTypeFromName(const char *name)
get GPU device type from name
exception class
Definition: Utilities.h:61
GFX10 architecture (AMD NAVI)
AMDGPUArchVersion getGPUArchVersion(GPUDeviceType deviceType, GPUArchVersionTable table)
get AMD GPU architecture version for specific device type and driver
cxuint getGPUExtraRegsNum(GPUArchitecture architecture, cxuint regType, Flags flags)
get extra registers (like VCC,FLAT_SCRATCH)
second iteration (Radeon Rx 200 series)