CLRX  1
An unofficial OpenCL extensions designed for Radeon GPUs
ROCmBinaries.h
Go to the documentation of this file.
1 /*
2  * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3  * Copyright (C) 2014-2017 Mateusz Szpakowski
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
23 #ifndef __CLRX_ROCMBINARIES_H__
24 #define __CLRX_ROCMBINARIES_H__
25 
26 #include <CLRX/Config.h>
27 #include <cstddef>
28 #include <cstdint>
29 #include <memory>
30 #include <string>
31 #include <CLRX/amdbin/Elf.h>
33 #include <CLRX/amdbin/Commons.h>
34 #include <CLRX/utils/MemAccess.h>
35 #include <CLRX/utils/Containers.h>
36 #include <CLRX/utils/Utilities.h>
37 #include <CLRX/utils/GPUId.h>
38 #include <CLRX/utils/InputOutput.h>
39 
41 namespace CLRX
42 {
43 
44 enum : Flags {
47 };
48 
50 enum ROCmRegionType: uint8_t
51 {
52  DATA,
55 };
56 
58 struct ROCmRegion
59 {
61  size_t size;
62  size_t offset;
64 };
65 
67 
71 {
72 public:
75 private:
76  size_t regionsNum;
77  std::unique_ptr<ROCmRegion[]> regions;
78  RegionMap regionsMap;
79  size_t codeSize;
80  cxbyte* code;
81 public:
83  ROCmBinary(size_t binaryCodeSize, cxbyte* binaryCode,
84  Flags creationFlags = ROCMBIN_CREATE_ALL);
86  ~ROCmBinary() = default;
87 
89  GPUDeviceType determineGPUDeviceType(uint32_t& archMinor,
90  uint32_t& archStepping) const;
91 
93  size_t getRegionsNum() const
94  { return regionsNum; }
95 
97  const ROCmRegion& getRegion(size_t index) const
98  { return regions[index]; }
99 
101  const ROCmRegion& getRegion(const char* name) const;
102 
104  size_t getCodeSize() const
105  { return codeSize; }
107  const cxbyte* getCode() const
108  { return code; }
109 
111  bool hasRegionMap() const
112  { return (creationFlags & ROCMBIN_CREATE_REGIONMAP) != 0; };
113 };
114 
115 enum {
116  ROCMFLAG_USE_PRIVATE_SEGMENT_BUFFER = AMDHSAFLAG_USE_PRIVATE_SEGMENT_BUFFER,
117  ROCMFLAG_USE_DISPATCH_PTR = AMDHSAFLAG_USE_DISPATCH_PTR,
118  ROCMFLAG_USE_QUEUE_PTR = AMDHSAFLAG_USE_QUEUE_PTR,
119  ROCMFLAG_USE_KERNARG_SEGMENT_PTR = AMDHSAFLAG_USE_KERNARG_SEGMENT_PTR,
120  ROCMFLAG_USE_DISPATCH_ID = AMDHSAFLAG_USE_DISPATCH_ID,
121  ROCMFLAG_USE_FLAT_SCRATCH_INIT = AMDHSAFLAG_USE_FLAT_SCRATCH_INIT,
122  ROCMFLAG_USE_PRIVATE_SEGMENT_SIZE = AMDHSAFLAG_USE_PRIVATE_SEGMENT_SIZE,
123  ROCMFLAG_USE_GRID_WORKGROUP_COUNT_BIT = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_BIT,
124  ROCMFLAG_USE_GRID_WORKGROUP_COUNT_X = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_X,
125  ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Y = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Y,
126  ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Z = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Z,
127 
128  ROCMFLAG_USE_ORDERED_APPEND_GDS = AMDHSAFLAG_USE_ORDERED_APPEND_GDS,
129  ROCMFLAG_PRIVATE_ELEM_SIZE_BIT = AMDHSAFLAG_PRIVATE_ELEM_SIZE_BIT,
130  ROCMFLAG_USE_PTR64 = AMDHSAFLAG_USE_PTR64,
131  ROCMFLAG_USE_DYNAMIC_CALL_STACK = AMDHSAFLAG_USE_DYNAMIC_CALL_STACK,
132  ROCMFLAG_USE_DEBUG_ENABLED = AMDHSAFLAG_USE_DEBUG_ENABLED,
133  ROCMFLAG_USE_XNACK_ENABLED = AMDHSAFLAG_USE_XNACK_ENABLED
134 };
135 
138 
140 extern bool isROCmBinary(size_t binarySize, const cxbyte* binary);
141 
142 /*
143  * ROCm Binary Generator
144  */
145 
146 enum: cxuint {
147  ROCMSECTID_HASH = ELFSECTID_OTHER_BUILTIN,
148  ROCMSECTID_DYNAMIC,
149  ROCMSECTID_NOTE,
150  ROCMSECTID_GPUCONFIG,
151  ROCMSECTID_MAX = ROCMSECTID_GPUCONFIG
152 };
153 
156 {
158  size_t offset;
159  size_t size;
161 };
162 
164 struct ROCmInput
165 {
167  uint32_t archMinor;
168  uint32_t archStepping;
169  std::vector<ROCmSymbolInput> symbols;
170  size_t codeSize;
171  const cxbyte* code;
172  size_t commentSize;
173  const char* comment;
174  std::vector<BinSection> extraSections;
175  std::vector<BinSymbol> extraSymbols;
176 
178  void addEmptyKernel(const char* kernelName);
179 };
180 
183 {
184 private:
185  private:
186  bool manageable;
187  const ROCmInput* input;
188 
189  void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
190  Array<cxbyte>* aPtr) const;
191 public:
195  ROCmBinGenerator(const ROCmInput* rocmInput);
196 
198 
206  ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
207  size_t codeSize, const cxbyte* code,
208  const std::vector<ROCmSymbolInput>& symbols);
210  ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
211  size_t codeSize, const cxbyte* code,
212  std::vector<ROCmSymbolInput>&& symbols);
214  ~ROCmBinGenerator();
215 
217  const ROCmInput* getInput() const
218  { return input; }
219 
221  void setInput(const ROCmInput* input);
222 
224  void generate(Array<cxbyte>& array) const;
225 
227  void generate(std::ostream& os) const;
228 
230  void generate(std::vector<char>& vector) const;
231 };
232 
233 };
234 
235 #endif
use private segment size
Definition: Commons.h:49
use workgroup count for Y dim
Definition: Commons.h:52
ROCm data region.
Definition: ROCmBinaries.h:58
non copyable and non movable base structure (class)
Definition: Utilities.h:43
uint32_t Flags
type for declaring various flags
Definition: Utilities.h:97
const ROCmInput * getInput() const
get input
Definition: ROCmBinaries.h:217
ROCmRegionType type
type
Definition: ROCmBinaries.h:63
size_t size
data size
Definition: ROCmBinaries.h:61
ROCm binary input structure.
Definition: ROCmBinaries.h:164
uint32_t archStepping
GPU arch stepping.
Definition: ROCmBinaries.h:168
use 64-bit pointers
Definition: Commons.h:57
class ElfBinaryTemplate< Elf64Types > ElfBinary64
type for 64-bit ELF binary
Definition: ElfBinaries.h:437
CString regionName
region name
Definition: ROCmBinaries.h:60
AMD HSA kernel configuration structure.
Definition: Commons.h:64
common definitions for binaries
size_t offset
data
Definition: ROCmBinaries.h:62
all ROCm binaries flags
Definition: ROCmBinaries.h:46
an array class
Definition: Containers.h:38
const ROCmRegion & getRegion(size_t index) const
get region by index
Definition: ROCmBinaries.h:97
use kernel argument segment pointer
Definition: Commons.h:46
Configuration header.
use privae segment buffer
Definition: Commons.h:43
size_t getCodeSize() const
get code size
Definition: ROCmBinaries.h:104
std::vector< BinSymbol > extraSymbols
extra symbols
Definition: ROCmBinaries.h:175
ROCm main binary for GPU for 64-bit mode.
Definition: ROCmBinaries.h:70
data object
Definition: ROCmBinaries.h:52
create region map
Definition: ROCmBinaries.h:45
bool isROCmBinary(size_t binarySize, const cxbyte *binary)
check whether is Amd OpenCL 2.0 binary
CString symbolName
symbol name
Definition: ROCmBinaries.h:157
input output utilities
xnack enabled
Definition: Commons.h:60
size_t codeSize
code size
Definition: ROCmBinaries.h:170
unsigned char cxbyte
unsigned byte
Definition: Config.h:213
const cxbyte * code
code
Definition: ROCmBinaries.h:171
main namespace
Definition: AsmDefs.h:38
ROCmRegionType
ROCm region/symbol type.
Definition: ROCmBinaries.h:50
Array< std::pair< CString, size_t > > RegionMap
region map type
Definition: ROCmBinaries.h:74
debug enabled
Definition: Commons.h:59
unsigned int cxuint
unsigned int
Definition: Config.h:221
const char * comment
comment
Definition: ROCmBinaries.h:173
use ordered append gds
Definition: Commons.h:56
inlines for accessing memory words in LittleEndian and unaligned
ROCm binary symbol input.
Definition: ROCmBinaries.h:155
GPUDeviceType
type of GPU device
Definition: GPUId.h:38
const cxbyte * getCode() const
get code
Definition: ROCmBinaries.h:107
ROCm binary generator.
Definition: ROCmBinaries.h:182
std::vector< ROCmSymbolInput > symbols
symbols
Definition: ROCmBinaries.h:169
utilities for other libraries and programs
uint32_t archMinor
GPU arch minor.
Definition: ROCmBinaries.h:167
ROCmRegionType type
type
Definition: ROCmBinaries.h:160
bool hasRegionMap() const
returns true if kernel map exists
Definition: ROCmBinaries.h:111
GPU identification utilities.
creation flags for ELF binaries
Definition: ElfBinaries.h:73
size_t size
size of symbol
Definition: ROCmBinaries.h:159
GPUDeviceType deviceType
GPU device type.
Definition: ROCmBinaries.h:166
size_t getRegionsNum() const
get regions number
Definition: ROCmBinaries.h:93
use queue pointer
Definition: Commons.h:45
use workgroup count for X dim
Definition: Commons.h:51
std::vector< BinSection > extraSections
extra sections
Definition: ROCmBinaries.h:174
OpenCL kernel to call ??
Definition: ROCmBinaries.h:54
size_t offset
offset in code
Definition: ROCmBinaries.h:158
use workgroup count for Z dim
Definition: Commons.h:53
Elf binaries handling.
simple C-string container
Definition: CString.h:38
Elf.h definitions.
containers and other utils for other libraries and programs
function kernel (code)
Definition: ROCmBinaries.h:53
size_t commentSize
comment size (can be null)
Definition: ROCmBinaries.h:172
AmdHsaKernelConfig ROCmKernelConfig
ROCm kernel configuration structure.
Definition: ROCmBinaries.h:137