CLRX  1
An unofficial OpenCL extensions designed for Radeon GPUs
ROCmBinaries.h
Go to the documentation of this file.
1 /*
2  * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3  * Copyright (C) 2014-2016 Mateusz Szpakowski
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
23 #ifndef __CLRX_ROCMBINARIES_H__
24 #define __CLRX_ROCMBINARIES_H__
25 
26 #include <CLRX/Config.h>
27 #include <cstddef>
28 #include <cstdint>
29 #include <memory>
30 #include <string>
31 #include <CLRX/amdbin/Elf.h>
33 #include <CLRX/utils/MemAccess.h>
34 #include <CLRX/utils/Containers.h>
35 #include <CLRX/utils/Utilities.h>
36 #include <CLRX/utils/GPUId.h>
37 #include <CLRX/utils/InputOutput.h>
38 
40 namespace CLRX
41 {
42 
43 enum : Flags {
45 
47 };
48 
50 enum ROCmRegionType: uint8_t
51 {
52  DATA,
55 };
56 
58 struct ROCmRegion
59 {
61  size_t size;
62  size_t offset;
63  ROCmRegionType type;
64 };
65 
67 
71 {
72 public:
74 private:
75  size_t regionsNum;
76  std::unique_ptr<ROCmRegion[]> regions;
77  RegionMap regionsMap;
78  size_t codeSize;
79  cxbyte* code;
80 public:
81  ROCmBinary(size_t binaryCodeSize, cxbyte* binaryCode,
82  Flags creationFlags = ROCMBIN_CREATE_ALL);
83  ~ROCmBinary() = default;
84 
86  size_t getRegionsNum() const
87  { return regionsNum; }
88 
90  const ROCmRegion& getRegion(size_t index) const
91  { return regions[index]; }
92 
94  const ROCmRegion& getRegion(const char* name) const;
95 
97  size_t getCodeSize() const
98  { return codeSize; }
100  const cxbyte* getCode() const
101  { return code; }
102 
104  bool hasRegionMap() const
105  { return (creationFlags & ROCMBIN_CREATE_REGIONMAP) != 0; };
106 };
107 
108 enum {
109  ROCMFLAG_USE_PRIVATE_SEGMENT_BUFFER = 1,
110  ROCMFLAG_USE_DISPATCH_PTR = 2,
111  ROCMFLAG_USE_QUEUE_PTR = 4,
112  ROCMFLAG_USE_KERNARG_SEGMENT_PTR = 8,
113  ROCMFLAG_USE_DISPATCH_ID = 16,
114  ROCMFLAG_USE_FLAT_SCRATCH_INIT = 32,
115  ROCMFLAG_USE_PRIVATE_SEGMENT_SIZE = 64,
116  ROCMFLAG_USE_GRID_WORKGROUP_COUNT_BIT = 7,
117  ROCMFLAG_USE_GRID_WORKGROUP_COUNT_X = 128,
118  ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Y = 256,
119  ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Z = 512,
120 
121  ROCMFLAG_USE_ORDERED_APPEND_GDS = 1,
122  ROCMFLAG_PRIVATE_ELEM_SIZE_BIT = 1,
123  ROCMFLAG_USE_PTR64 = 8,
124  ROCMFLAG_USE_DYNAMIC_CALL_STACK = 16,
125  ROCMFLAG_USE_DEBUG_ENABLED = 32,
126  ROCMFLAG_USE_XNACK_ENABLED = 64
127 };
128 
131 {
132  uint32_t amdCodeVersionMajor;
133  uint32_t amdCodeVersionMinor;
134  uint16_t amdMachineKind;
135  uint16_t amdMachineMajor;
136  uint16_t amdMachineMinor;
137  uint16_t amdMachineStepping;
138  uint64_t kernelCodeEntryOffset;
139  uint64_t kernelCodePrefetchOffset;
140  uint64_t kernelCodePrefetchSize;
141  uint64_t maxScrachBackingMemorySize;
142  uint32_t computePgmRsrc1;
143  uint32_t computePgmRsrc2;
144  uint16_t enableSpgrRegisterFlags;
145  uint16_t enableFeatureFlags;
146  uint32_t workitemPrivateSegmentSize;
147  uint32_t workgroupGroupSegmentSize;
148  uint32_t gdsSegmentSize;
149  uint64_t kernargSegmentSize;
150  uint32_t workgroupFbarrierCount;
151  uint16_t wavefrontSgprCount;
152  uint16_t workitemVgprCount;
153  uint16_t reservedVgprFirst;
154  uint16_t reservedVgprCount;
155  uint16_t reservedSgprFirst;
156  uint16_t reservedSgprCount;
157  uint16_t debugWavefrontPrivateSegmentOffsetSgpr;
158  uint16_t debugPrivateSegmentBufferSgpr;
159  cxbyte kernargSegmentAlignment;
160  cxbyte groupSegmentAlignment;
161  cxbyte privateSegmentAlignment;
162  cxbyte wavefrontSize;
163  uint32_t callConvention;
164  uint32_t reserved1[3];
165  uint64_t runtimeLoaderKernelSymbol;
166  cxbyte controlDirective[128];
167 };
168 
170 extern bool isROCmBinary(size_t binarySize, const cxbyte* binary);
171 
172 /*
173  * ROCm Binary Generator
174  */
175 
176 enum: cxuint {
177  ROCMSECTID_HASH = ELFSECTID_OTHER_BUILTIN,
178  ROCMSECTID_DYNAMIC,
179  ROCMSECTID_NOTE,
180  ROCMSECTID_GPUCONFIG,
181  ROCMSECTID_MAX = ROCMSECTID_GPUCONFIG
182 };
183 
186 {
188  size_t offset;
189  size_t size;
191 };
192 
193 struct ROCmInput
194 {
196  uint32_t archMinor;
197  uint32_t archStepping;
198  std::vector<ROCmSymbolInput> symbols;
199  size_t codeSize;
200  const cxbyte* code;
201  size_t commentSize;
202  const char* comment;
203  std::vector<BinSection> extraSections;
204  std::vector<BinSymbol> extraSymbols;
205 
206  void addEmptyKernel(const char* kernelName);
207 };
208 
210 {
211 private:
212  private:
213  bool manageable;
214  const ROCmInput* input;
215 
216  void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
217  Array<cxbyte>* aPtr) const;
218 public:
221  ROCmBinGenerator(const ROCmInput* rocmInput);
222 
223  ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
224  size_t codeSize, const cxbyte* code,
225  const std::vector<ROCmSymbolInput>& symbols);
226  ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
227  size_t codeSize, const cxbyte* code,
228  std::vector<ROCmSymbolInput>&& symbols);
229  ~ROCmBinGenerator();
230 
232  const ROCmInput* getInput() const
233  { return input; }
234 
236  void setInput(const ROCmInput* input);
237 
239  void generate(Array<cxbyte>& array) const;
240 
242  void generate(std::ostream& os) const;
243 
245  void generate(std::vector<char>& vector) const;
246 };
247 
248 };
249 
250 #endif
ROCm data region.
Definition: ROCmBinaries.h:58
non copyable and non movable base structure (class)
Definition: Utilities.h:43
uint32_t Flags
type for declaring various flags
Definition: Utilities.h:97
const ROCmInput * getInput() const
get input
Definition: ROCmBinaries.h:232
size_t size
data size
Definition: ROCmBinaries.h:61
Definition: ROCmBinaries.h:193
uint32_t archStepping
GPU arch stepping.
Definition: ROCmBinaries.h:197
class ElfBinaryTemplate< Elf64Types > ElfBinary64
type for 64-bit ELF binary
Definition: ElfBinaries.h:432
CString regionName
region name
Definition: ROCmBinaries.h:60
size_t offset
data
Definition: ROCmBinaries.h:62
an array class
Definition: Containers.h:38
const ROCmRegion & getRegion(size_t index) const
get region by index
Definition: ROCmBinaries.h:90
size_t getCodeSize() const
get code size
Definition: ROCmBinaries.h:97
std::vector< BinSymbol > extraSymbols
extra symbols
Definition: ROCmBinaries.h:204
ROCm main binary for GPU for 64-bit mode.
Definition: ROCmBinaries.h:70
data object
Definition: ROCmBinaries.h:52
bool isROCmBinary(size_t binarySize, const cxbyte *binary)
check whether is Amd OpenCL 2.0 binary
CString symbolName
symbol name
Definition: ROCmBinaries.h:187
input output utilities
size_t codeSize
code size
Definition: ROCmBinaries.h:199
const cxbyte * code
code
Definition: ROCmBinaries.h:200
main namespace
Definition: AsmFormats.h:41
ROCmRegionType
ROCm region/symbol type.
Definition: ROCmBinaries.h:50
const char * comment
comment
Definition: ROCmBinaries.h:202
ROCm kernel configuration structure.
Definition: ROCmBinaries.h:130
creation flags for ELF binaries
Definition: ElfBinaries.h:73
inlines for accessing memory words in LittleEndian and unaligned
ROCm binary symbol input.
Definition: ROCmBinaries.h:185
GPUDeviceType
type of GPU device
Definition: GPUId.h:38
const cxbyte * getCode() const
get code
Definition: ROCmBinaries.h:100
create region map
Definition: ROCmBinaries.h:44
Definition: ROCmBinaries.h:209
std::vector< ROCmSymbolInput > symbols
symbols
Definition: ROCmBinaries.h:198
utilities for other libraries and programs
uint32_t archMinor
GPU arch minor.
Definition: ROCmBinaries.h:196
ROCmRegionType type
type
Definition: ROCmBinaries.h:190
all ROCm binaries flags
Definition: ROCmBinaries.h:46
bool hasRegionMap() const
returns true if kernel map exists
Definition: ROCmBinaries.h:104
GPU identification utilities.
size_t size
size of symbol
Definition: ROCmBinaries.h:189
GPUDeviceType deviceType
GPU device type.
Definition: ROCmBinaries.h:195
size_t getRegionsNum() const
get regions number
Definition: ROCmBinaries.h:86
std::vector< BinSection > extraSections
extra sections
Definition: ROCmBinaries.h:203
OpenCL kernel to call ??
Definition: ROCmBinaries.h:54
size_t offset
offset in code
Definition: ROCmBinaries.h:188
Elf binaries handling.
simple C-string container
Definition: CString.h:38
containers and other utils for other libraries and programs
function kernel (code)
Definition: ROCmBinaries.h:53
size_t commentSize
comment size (can be null)
Definition: ROCmBinaries.h:201