CLRX  1
An unofficial OpenCL extensions designed for Radeon GPUs
ROCmBinaries.h
Go to the documentation of this file.
1 /*
2  * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3  * Copyright (C) 2014-2018 Mateusz Szpakowski
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
23 #ifndef __CLRX_ROCMBINARIES_H__
24 #define __CLRX_ROCMBINARIES_H__
25 
26 #include <CLRX/Config.h>
27 #include <cstddef>
28 #include <cstdint>
29 #include <memory>
30 #include <string>
31 #include <vector>
32 #include <CLRX/amdbin/Elf.h>
34 #include <CLRX/amdbin/Commons.h>
35 #include <CLRX/utils/MemAccess.h>
36 #include <CLRX/utils/Containers.h>
37 #include <CLRX/utils/Utilities.h>
38 #include <CLRX/utils/GPUId.h>
39 #include <CLRX/utils/InputOutput.h>
40 
42 namespace CLRX
43 {
44 
45 enum : Flags {
50 };
51 
53 enum ROCmRegionType: uint8_t
54 {
55  DATA,
58 };
59 
61 struct ROCmRegion
62 {
64  size_t size;
65  size_t offset;
67 };
68 
70 enum class ROCmValueKind : cxbyte
71 {
72  BY_VALUE = 0,
75  SAMPLER,
76  IMAGE,
77  PIPE,
78  QUEUE,
82  HIDDEN_NONE,
86  MAX_VALUE = HIDDEN_COMPLETION_ACTION
87 };
88 
90 enum class ROCmValueType : cxbyte
91 {
92  STRUCTURE = 0,
93  INT8,
94  UINT8,
95  INT16,
96  UINT16,
97  FLOAT16,
98  INT32,
99  UINT32,
100  FLOAT32,
101  INT64,
102  UINT64,
103  FLOAT64,
104  MAX_VALUE = FLOAT64
105 };
106 
109 {
110  NONE = 0,
111  PRIVATE,
112  GLOBAL,
113  CONSTANT,
114  LOCAL,
115  GENERIC,
116  REGION,
117  MAX_VALUE = REGION
118 };
119 
122 {
123  DEFAULT = 0,
124  READ_ONLY,
125  WRITE_ONLY,
126  READ_WRITE,
127  MAX_VALUE = READ_WRITE
128 };
129 
132 {
135  uint64_t size;
136  uint64_t align;
137  uint64_t pointeeAlign;
143  bool isConst;
144  bool isRestrict;
145  bool isVolatile;
146  bool isPipe;
147 };
148 
151 {
154  std::vector<ROCmKernelArgInfo> argInfos;
156  cxuint langVersion[2];
157  cxuint reqdWorkGroupSize[3];
158  cxuint workGroupSizeHint[3];
168  uint64_t maxFlatWorkGroupSize;
169  cxuint fixedWorkGroupSize[3];
172 
173  void initialize();
174 };
175 
178 {
179  uint32_t id;
182 };
183 
186 {
187  cxuint version[2];
188  std::vector<ROCmPrintfInfo> printfInfos;
189  std::vector<ROCmKernelMetadata> kernels;
190 
192  void initialize();
194  void parse(size_t metadataSize, const char* metadata);
195 };
196 
198 
202 {
203 public:
206 private:
207  size_t regionsNum;
208  std::unique_ptr<ROCmRegion[]> regions;
209  RegionMap regionsMap;
210  size_t codeSize;
211  cxbyte* code;
212  size_t globalDataSize;
213  cxbyte* globalData;
214  CString target;
215  size_t metadataSize;
216  char* metadata;
217  std::unique_ptr<ROCmMetadata> metadataInfo;
218  RegionMap kernelInfosMap;
219  Array<size_t> gotSymbols;
220  bool newBinFormat;
221 public:
223  ROCmBinary(size_t binaryCodeSize, cxbyte* binaryCode,
224  Flags creationFlags = ROCMBIN_CREATE_ALL);
226  ~ROCmBinary() = default;
227 
229  GPUDeviceType determineGPUDeviceType(uint32_t& archMinor,
230  uint32_t& archStepping) const;
231 
233  size_t getRegionsNum() const
234  { return regionsNum; }
235 
237  const ROCmRegion& getRegion(size_t index) const
238  { return regions[index]; }
239 
241  const ROCmRegion& getRegion(const char* name) const;
242 
244  size_t getCodeSize() const
245  { return codeSize; }
247  const cxbyte* getCode() const
248  { return code; }
251  { return code; }
252 
254  size_t getGlobalDataSize() const
255  { return globalDataSize; }
256 
258  const cxbyte* getGlobalData() const
259  { return globalData; }
262  { return globalData; }
263 
265  size_t getMetadataSize() const
266  { return metadataSize; }
268  const char* getMetadata() const
269  { return metadata; }
271  char* getMetadata()
272  { return metadata; }
273 
275  bool hasMetadataInfo() const
276  { return metadataInfo!=nullptr; }
277 
280  { return *metadataInfo; }
281 
283  size_t getKernelInfosNum() const
284  { return metadataInfo->kernels.size(); }
285 
287  const ROCmKernelMetadata& getKernelInfo(size_t index) const
288  { return metadataInfo->kernels[index]; }
289 
291  const ROCmKernelMetadata& getKernelInfo(const char* name) const;
292 
294  const CString& getTarget() const
295  { return target; }
296 
298  bool isNewBinaryFormat() const
299  { return newBinFormat; }
300 
302  size_t getGotSymbolsNum() const
303  { return gotSymbols.size(); }
304 
307  { return gotSymbols; }
308 
310  size_t getGotSymbol(size_t index) const
311  { return gotSymbols[index]; }
312 
314  bool hasRegionMap() const
315  { return (creationFlags & ROCMBIN_CREATE_REGIONMAP) != 0; }
317  bool hasKernelInfoMap() const
318  { return (creationFlags & ROCMBIN_CREATE_KERNELINFOMAP) != 0; }
319 };
320 
321 enum {
322  ROCMFLAG_USE_PRIVATE_SEGMENT_BUFFER = AMDHSAFLAG_USE_PRIVATE_SEGMENT_BUFFER,
323  ROCMFLAG_USE_DISPATCH_PTR = AMDHSAFLAG_USE_DISPATCH_PTR,
324  ROCMFLAG_USE_QUEUE_PTR = AMDHSAFLAG_USE_QUEUE_PTR,
325  ROCMFLAG_USE_KERNARG_SEGMENT_PTR = AMDHSAFLAG_USE_KERNARG_SEGMENT_PTR,
326  ROCMFLAG_USE_DISPATCH_ID = AMDHSAFLAG_USE_DISPATCH_ID,
327  ROCMFLAG_USE_FLAT_SCRATCH_INIT = AMDHSAFLAG_USE_FLAT_SCRATCH_INIT,
328  ROCMFLAG_USE_PRIVATE_SEGMENT_SIZE = AMDHSAFLAG_USE_PRIVATE_SEGMENT_SIZE,
329  ROCMFLAG_USE_GRID_WORKGROUP_COUNT_BIT = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_BIT,
330  ROCMFLAG_USE_GRID_WORKGROUP_COUNT_X = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_X,
331  ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Y = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Y,
332  ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Z = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Z,
333 
334  ROCMFLAG_USE_ORDERED_APPEND_GDS = AMDHSAFLAG_USE_ORDERED_APPEND_GDS,
335  ROCMFLAG_PRIVATE_ELEM_SIZE_BIT = AMDHSAFLAG_PRIVATE_ELEM_SIZE_BIT,
336  ROCMFLAG_USE_PTR64 = AMDHSAFLAG_USE_PTR64,
337  ROCMFLAG_USE_DYNAMIC_CALL_STACK = AMDHSAFLAG_USE_DYNAMIC_CALL_STACK,
338  ROCMFLAG_USE_DEBUG_ENABLED = AMDHSAFLAG_USE_DEBUG_ENABLED,
339  ROCMFLAG_USE_XNACK_ENABLED = AMDHSAFLAG_USE_XNACK_ENABLED
340 };
341 
344 
346 extern bool isROCmBinary(size_t binarySize, const cxbyte* binary);
347 
348 /*
349  * ROCm Binary Generator
350  */
351 
352 enum: cxuint {
353  ROCMSECTID_HASH = ELFSECTID_OTHER_BUILTIN,
354  ROCMSECTID_DYNAMIC,
355  ROCMSECTID_NOTE,
356  ROCMSECTID_GPUCONFIG,
357  ROCMSECTID_RELADYN,
358  ROCMSECTID_GOT,
359  ROCMSECTID_MAX = ROCMSECTID_GOT
360 };
361 
364 {
366  size_t offset;
367  size_t size;
369 };
370 
372 struct ROCmInput
373 {
375  uint32_t archMinor;
376  uint32_t archStepping;
377  uint32_t eflags;
379  size_t globalDataSize;
381  std::vector<ROCmSymbolInput> symbols;
382  size_t codeSize;
383  const cxbyte* code;
384  size_t commentSize;
385  const char* comment;
388  size_t metadataSize;
389  const char* metadata;
392 
394 
398  std::vector<size_t> gotSymbols;
399  std::vector<BinSection> extraSections;
400  std::vector<BinSymbol> extraSymbols;
401 
403  void addEmptyKernel(const char* kernelName);
404 };
405 
408 {
409 private:
410  private:
411  bool manageable;
412  const ROCmInput* input;
413  std::unique_ptr<ElfBinaryGen64> elfBinGen64;
414  size_t binarySize;
415  size_t commentSize;
416  const char* comment;
417  std::string target;
418  std::unique_ptr<cxbyte[]> noteBuf;
419  std::string metadataStr;
420  size_t metadataSize;
421  const char* metadata;
422  cxuint mainSectionsNum;
423  uint16_t mainBuiltinSectTable[ROCMSECTID_MAX-ELFSECTID_START+1];
424  void* rocmGotGen;
425  void* rocmRelaDynGen;
426 
427  void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
428  Array<cxbyte>* aPtr);
429 public:
433  explicit ROCmBinGenerator(const ROCmInput* rocmInput);
434 
436 
446  ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
447  size_t codeSize, const cxbyte* code,
448  size_t globalDataSize, const cxbyte* globalData,
449  const std::vector<ROCmSymbolInput>& symbols);
451  ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
452  size_t codeSize, const cxbyte* code,
453  size_t globalDataSize, const cxbyte* globalData,
454  std::vector<ROCmSymbolInput>&& symbols);
456  ~ROCmBinGenerator();
457 
459  const ROCmInput* getInput() const
460  { return input; }
462  void setInput(const ROCmInput* input);
463 
465  void prepareBinaryGen();
467  size_t getSectionOffset(cxuint sectionId) const
468  { return elfBinGen64->getRegionOffset(
469  mainBuiltinSectTable[sectionId - ELFSECTID_START]); }
471  void updateSymbols();
472 
474  void generate(Array<cxbyte>& array);
475 
477  void generate(std::ostream& os);
478 
480  void generate(std::vector<char>& vector);
481 };
482 
483 };
484 
485 #endif
ROCmAccessQual
ROCm access qualifier.
Definition: ROCmBinaries.h:121
CString format
printf format
Definition: ROCmBinaries.h:181
cxuint vgprsNum
number of VGPRs
Definition: ROCmBinaries.h:167
ROCmValueKind
ROCm Value kind.
Definition: ROCmBinaries.h:70
use ordered append gds
Definition: Commons.h:56
ROCm data region.
Definition: ROCmBinaries.h:61
pointer to local memory
double floating point
cxuint spilledVgprs
number of spilled VGPRs
Definition: ROCmBinaries.h:171
non copyable and non movable base structure (class)
Definition: Utilities.h:46
CString runtimeHandle
symbol of runtime handle
Definition: ROCmBinaries.h:160
uint32_t Flags
type for declaring various flags
Definition: Utilities.h:100
pointer to global memory
const ROCmInput * getInput() const
get input
Definition: ROCmBinaries.h:459
ROCmRegionType type
type
Definition: ROCmBinaries.h:66
uint64_t pointeeAlign
alignemnt of pointed data of pointer
Definition: ROCmBinaries.h:137
const char * getMetadata() const
get metadata
Definition: ROCmBinaries.h:268
none operation
size_t size
data size
Definition: ROCmBinaries.h:64
ROCm binary input structure.
Definition: ROCmBinaries.h:372
uint32_t archStepping
GPU arch stepping.
Definition: ROCmBinaries.h:376
CString symbolName
symbol name
Definition: ROCmBinaries.h:153
bool isRestrict
is restrict
Definition: ROCmBinaries.h:144
CString language
language
Definition: ROCmBinaries.h:155
ROCm printf call info.
Definition: ROCmBinaries.h:177
class ElfBinaryTemplate< Elf64Types > ElfBinary64
type for 64-bit ELF binary
Definition: ElfBinaries.h:483
use 64-bit pointers
Definition: Commons.h:57
CString regionName
region name
Definition: ROCmBinaries.h:63
creation flags for ELF binaries
Definition: ElfBinaries.h:73
ROCmValueKind valueKind
value kind
Definition: ROCmBinaries.h:138
AMD HSA kernel configuration structure.
Definition: Commons.h:64
use workgroup count for Y dim
Definition: Commons.h:52
common definitions for binaries
size_t offset
data
Definition: ROCmBinaries.h:65
const Array< size_t > getGotSymbols() const
get GOT symbols (indices) (from elfbin dynsymbols)
Definition: ROCmBinaries.h:306
CString target
LLVM target triple with device name.
Definition: ROCmBinaries.h:386
use kernel argument segment pointer
Definition: Commons.h:46
use workgroup count for X dim
Definition: Commons.h:51
size_t getKernelInfosNum() const
get kernel metadata infos number
Definition: ROCmBinaries.h:283
64-bit unsigned integer
const ROCmRegion & getRegion(size_t index) const
get region by index
Definition: ROCmBinaries.h:237
const ROCmKernelMetadata & getKernelInfo(size_t index) const
get kernel metadata info
Definition: ROCmBinaries.h:287
size_t globalDataSize
global data size
Definition: ROCmBinaries.h:379
Array< uint32_t > argSizes
unique id of call
Definition: ROCmBinaries.h:180
Configuration header.
cxuint spilledSgprs
number of spilled SGPRs
Definition: ROCmBinaries.h:170
uint64_t align
argument alignment in bytes
Definition: ROCmBinaries.h:136
ROCmValueType
ROCm argument&#39;s value type.
Definition: ROCmBinaries.h:90
size_t getSectionOffset(cxuint sectionId) const
get section offset (from main section)
Definition: ROCmBinaries.h:467
use private segment buffer
Definition: Commons.h:43
size_t getMetadataSize() const
get metadata size
Definition: ROCmBinaries.h:265
uint64_t kernargSegmentSize
kernel argument segment size
Definition: ROCmBinaries.h:161
ROCmAccessQual actualAccessQual
actual access qualifier
Definition: ROCmBinaries.h:142
std::vector< ROCmKernelArgInfo > argInfos
kernel arguments
Definition: ROCmBinaries.h:154
bool newBinFormat
use new binary format for ROCm
Definition: ROCmBinaries.h:378
size_t getCodeSize() const
get code size
Definition: ROCmBinaries.h:244
std::vector< BinSymbol > extraSymbols
extra symbols
Definition: ROCmBinaries.h:400
cxbyte * getGlobalData()
get global data
Definition: ROCmBinaries.h:261
CString name
name
Definition: ROCmBinaries.h:133
single floating point
32-bit signed integer
ROCm main binary for GPU for 64-bit mode.
Definition: ROCmBinaries.h:201
data object
Definition: ROCmBinaries.h:55
std::vector< ROCmKernelMetadata > kernels
kernel metadatas
Definition: ROCmBinaries.h:189
bool isConst
is constant
Definition: ROCmBinaries.h:143
bool isROCmBinary(size_t binarySize, const cxbyte *binary)
check whether is Amd OpenCL 2.0 binary
ROCmAccessQual accessQual
access qualifier (for images and values)
Definition: ROCmBinaries.h:141
const char * metadata
metadata
Definition: ROCmBinaries.h:389
create metadata info object
Definition: ROCmBinaries.h:47
CString symbolName
symbol name
Definition: ROCmBinaries.h:365
create region map
Definition: ROCmBinaries.h:46
input output utilities
ROCmMetadata metadataInfo
metadata info
Definition: ROCmBinaries.h:391
std::vector< size_t > gotSymbols
list of indices of symbols to GOT section
Definition: ROCmBinaries.h:398
CString vecTypeHint
vector type hint
Definition: ROCmBinaries.h:159
size_t codeSize
code size
Definition: ROCmBinaries.h:382
bool isVolatile
is volatile
Definition: ROCmBinaries.h:145
unsigned char cxbyte
unsigned byte
Definition: Config.h:229
const cxbyte * code
code
Definition: ROCmBinaries.h:383
bool isPipe
is pipe
Definition: ROCmBinaries.h:146
main namespace
Definition: AsmDefs.h:38
size_t getGlobalDataSize() const
get global data size
Definition: ROCmBinaries.h:254
ROCmRegionType
ROCm region/symbol type.
Definition: ROCmBinaries.h:53
Array< std::pair< CString, size_t > > RegionMap
region map type
Definition: ROCmBinaries.h:205
ROCm kernel metadata.
Definition: ROCmBinaries.h:150
ROCm binary metadata.
Definition: ROCmBinaries.h:185
cxbyte * getCode()
get code
Definition: ROCmBinaries.h:250
unsigned int cxuint
unsigned int
Definition: Config.h:237
uint32_t eflags
ELF headef e_flags field.
Definition: ROCmBinaries.h:377
use workgroup count for Z dim
Definition: Commons.h:53
create kernel metadata info map
Definition: ROCmBinaries.h:48
const char * comment
comment
Definition: ROCmBinaries.h:385
all ROCm binaries flags
Definition: ROCmBinaries.h:49
cxuint wavefrontSize
wavefront size
Definition: ROCmBinaries.h:165
size_t getGotSymbolsNum() const
get GOT symbol index (from elfbin dynsymbols)
Definition: ROCmBinaries.h:302
passed in global buffer
bool isNewBinaryFormat() const
return true is new binary format
Definition: ROCmBinaries.h:298
std::vector< ROCmPrintfInfo > printfInfos
printf calls infos
Definition: ROCmBinaries.h:188
ROCm kernel argument.
Definition: ROCmBinaries.h:131
xnack enabled
Definition: Commons.h:60
inlines for accessing memory words in LittleEndian and unaligned
const cxbyte * globalData
global data
Definition: ROCmBinaries.h:380
32-bit unsigned integer
ROCm binary symbol input.
Definition: ROCmBinaries.h:363
GPUDeviceType
type of GPU device
Definition: GPUId.h:51
const cxbyte * getCode() const
get code
Definition: ROCmBinaries.h:247
ROCm binary generator.
Definition: ROCmBinaries.h:407
ROCmValueType valueType
value type
Definition: ROCmBinaries.h:139
std::vector< ROCmSymbolInput > symbols
symbols
Definition: ROCmBinaries.h:381
utilities for other libraries and programs
bool hasKernelInfoMap() const
returns true if object has kernel map
Definition: ROCmBinaries.h:317
debug enabled
Definition: Commons.h:59
passed as dynamic shared pointer
uint64_t privateSegmentFixedSize
private segment size (fixed)
Definition: ROCmBinaries.h:163
const CString & getTarget() const
get target
Definition: ROCmBinaries.h:294
uint32_t archMinor
GPU arch minor.
Definition: ROCmBinaries.h:375
size_t metadataSize
metadata size
Definition: ROCmBinaries.h:388
ROCmRegionType type
type
Definition: ROCmBinaries.h:368
bool hasMetadataInfo() const
has metadata info
Definition: ROCmBinaries.h:275
64-bit signed integer
bool hasRegionMap() const
returns true if kernel map exists
Definition: ROCmBinaries.h:314
use queue pointer
Definition: Commons.h:45
CString targetTripple
same LLVM target tripple
Definition: ROCmBinaries.h:387
GPU identification utilities.
size_t size() const
returns number of elements
Definition: Containers.h:172
size_t size
size of symbol
Definition: ROCmBinaries.h:367
ROCmAddressSpace
ROCm argument address space.
Definition: ROCmBinaries.h:108
CString name
kernel name
Definition: ROCmBinaries.h:152
GPUDeviceType deviceType
GPU device type.
Definition: ROCmBinaries.h:374
size_t getRegionsNum() const
get regions number
Definition: ROCmBinaries.h:233
cxuint sgprsNum
number of SGPRs
Definition: ROCmBinaries.h:166
value is just value
ROCmAddressSpace addressSpace
pointer address space
Definition: ROCmBinaries.h:140
use private segment size
Definition: Commons.h:49
size_t getGotSymbol(size_t index) const
get GOT symbol index (from elfbin dynsymbols)
Definition: ROCmBinaries.h:310
const ROCmMetadata & getMetadataInfo() const
get metadata info
Definition: ROCmBinaries.h:279
const cxbyte * getGlobalData() const
get global data
Definition: ROCmBinaries.h:258
uint64_t kernargSegmentAlign
alignment of kernel argument segment
Definition: ROCmBinaries.h:164
std::vector< BinSection > extraSections
extra sections
Definition: ROCmBinaries.h:399
bool useMetadataInfo
use metadatainfo instead same metadata
Definition: ROCmBinaries.h:390
OpenCL kernel to call ??
Definition: ROCmBinaries.h:57
size_t offset
offset in code
Definition: ROCmBinaries.h:366
Elf binaries handling.
simple C-string container
Definition: CString.h:38
Elf.h definitions.
containers and other utils for other libraries and programs
function kernel (code)
Definition: ROCmBinaries.h:56
CString typeName
type name
Definition: ROCmBinaries.h:134
uint64_t size
argument size in bytes
Definition: ROCmBinaries.h:135
char * getMetadata()
get metadata
Definition: ROCmBinaries.h:271
size_t commentSize
comment size (can be null)
Definition: ROCmBinaries.h:384
pointer to constant memory
uint64_t groupSegmentFixedSize
group segment size (fixed)
Definition: ROCmBinaries.h:162
AmdHsaKernelConfig ROCmKernelConfig
ROCm kernel configuration structure.
Definition: ROCmBinaries.h:343