CLRX  1
An unofficial OpenCL extensions designed for Radeon GPUs
AmdCL2BinGen.h
Go to the documentation of this file.
1 /*
2  * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3  * Copyright (C) 2014-2018 Mateusz Szpakowski
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
23 #ifndef __CLRX_AMDCL2BINGEN_H__
24 #define __CLRX_AMDCL2BINGEN_H__
25 
26 #include <CLRX/Config.h>
27 #include <cstddef>
28 #include <cstdint>
29 #include <string>
30 #include <ostream>
31 #include <vector>
32 #include <CLRX/amdbin/Commons.h>
33 #include <CLRX/amdbin/AmdBinGen.h>
34 #include <CLRX/utils/Containers.h>
35 #include <CLRX/utils/GPUId.h>
36 #include <CLRX/utils/InputOutput.h>
37 
38 namespace CLRX
39 {
40 
41 enum: cxbyte {
46 };
47 
48 enum: cxuint {
49  AMDCL2SECTID_SAMPLERINIT = ELFSECTID_OTHER_BUILTIN,
50  AMDCL2SECTID_TEXTRELA,
51  AMDCL2SECTID_RODATARELA,
52  AMDCL2SECTID_NOTE,
53  AMDCL2SECTID_MAX = AMDCL2SECTID_NOTE
54 };
55 
58 {
59  std::vector<AmdKernelArgInput> args;
60  std::vector<cxuint> samplers;
61  uint32_t dimMask;
62  uint32_t reqdWorkGroupSize[3];
63  uint32_t usedVGPRsNum;
64  uint32_t usedSGPRsNum;
65  uint32_t pgmRSRC1;
66  uint32_t pgmRSRC2;
67  uint32_t floatMode;
68  uint32_t priority;
69  size_t localSize;
70  uint32_t gdsSize;
71  uint32_t scratchBufferSize;
72  bool ieeeMode;
74  bool tgSize;
75  bool debugMode;
77  bool dx10Clamp;
78  bool useSetup;
79  bool useArgs;
80  bool useEnqueue;
81  bool useGeneric;
83  uint32_t workGroupSizeHint[3];
84 
85  size_t calculateKernelArgSize(bool is64Bit, bool newBinaries) const;
86 };
87 
90 {
91  size_t offset;
94  size_t addend;
95 };
96 
99 {
101  size_t stubSize;
102  const cxbyte* stub;
103  size_t setupSize;
104  const cxbyte* setup;
105  size_t metadataSize;
106  const cxbyte* metadata;
109  bool useConfig;
110  bool hsaConfig;
112  std::vector<AmdCL2RelInput> relocations;
113  size_t codeSize;
114  const cxbyte* code;
115  size_t offset;
116 };
117 
120 {
121  bool is64Bit;
123  uint32_t archMinor;
124  uint32_t archStepping;
125  size_t globalDataSize;
127  size_t rwDataSize;
128  const cxbyte* rwData;
129  size_t codeSize;
130  const cxbyte* code;
131  std::vector<AmdCL2RelInput> relocations;
132  size_t bssAlignment;
133  size_t bssSize;
137  std::vector<uint32_t> samplers;
138  std::vector<size_t> samplerOffsets;
139  uint32_t driverVersion;
142  std::vector<AmdCL2KernelInput> kernels;
143  std::vector<BinSection> extraSections;
144  std::vector<BinSymbol> extraSymbols;
145  std::vector<BinSection> innerExtraSections;
146  std::vector<BinSymbol> innerExtraSymbols;
147 
149  void addEmptyKernel(const char* kernelName);
150 };
151 
154 {
155 private:
156  bool manageable;
157  const AmdCL2Input* input;
158 
159  void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
160  Array<cxbyte>* aPtr) const;
161 public:
163 
165  explicit AmdCL2GPUBinGenerator(const AmdCL2Input* amdInput);
167 
179  AmdCL2GPUBinGenerator(bool _64bitMode, GPUDeviceType deviceType,
180  uint32_t archMinor, uint32_t archStepping, uint32_t driverVersion,
181  size_t globalDataSize, const cxbyte* globalData,
182  size_t rwDataSize, const cxbyte* rwData,
183  const std::vector<AmdCL2KernelInput>& kernelInputs);
185  AmdCL2GPUBinGenerator(bool _64bitMode, GPUDeviceType deviceType,
186  uint32_t archMinor, uint32_t archStepping, uint32_t driverVersion,
187  size_t globalDataSize, const cxbyte* globalData,
188  size_t rwDataSize, const cxbyte* rwData,
189  std::vector<AmdCL2KernelInput>&& kernelInputs);
191 
193  const AmdCL2Input* getInput() const
194  { return input; }
195 
197  void setInput(const AmdCL2Input* input);
198 
200  void generate(Array<cxbyte>& array) const;
201 
203  void generate(std::ostream& os) const;
204 
206  void generate(std::vector<char>& vector) const;
207 };
208 
209 };
210 
211 #endif
main AMD OpenCL2.0 GPU Binary generator
Definition: AmdCL2BinGen.h:153
std::vector< BinSymbol > extraSymbols
extra symbols
Definition: AmdCL2BinGen.h:144
non copyable and non movable base structure (class)
Definition: Utilities.h:46
bool hsaConfig
true if configuration in setup as HSA config
Definition: AmdCL2BinGen.h:110
uint32_t pgmRSRC1
pgmRSRC1 register value
Definition: AmdCL2BinGen.h:65
bool tgSize
enable tgSize
Definition: AmdCL2BinGen.h:74
AMD CL2 Relocation entry input.
Definition: AmdCL2BinGen.h:89
AMD kernel input.
Definition: AmdCL2BinGen.h:98
std::vector< BinSection > extraSections
extra sections
Definition: AmdCL2BinGen.h:143
const cxbyte * stub
kernel stub size (used if useConfig=false)
Definition: AmdCL2BinGen.h:102
size_t samplerInitSize
sampler init size
Definition: AmdCL2BinGen.h:134
argument to write
Definition: AmdCL2BinGen.h:44
bool privilegedMode
prvileged mode
Definition: AmdCL2BinGen.h:76
size_t localSize
used local size (not local defined in kernel arguments)
Definition: AmdCL2BinGen.h:69
CString compileOptions
compile options
Definition: AmdCL2BinGen.h:140
size_t addend
addend
Definition: AmdCL2BinGen.h:94
uint32_t usedVGPRsNum
number of used VGPRs
Definition: AmdCL2BinGen.h:63
size_t stubSize
kernel stub size (used if useConfig=false)
Definition: AmdCL2BinGen.h:101
uint32_t archStepping
arch stepping
Definition: AmdCL2BinGen.h:124
bool dx10Clamp
DX10 CLAMP mode.
Definition: AmdCL2BinGen.h:77
uint32_t driverVersion
driver version (majorVersion*100 + minorVersion)
Definition: AmdCL2BinGen.h:139
common definitions for binaries
CString aclVersion
acl version string
Definition: AmdCL2BinGen.h:141
const cxbyte * code
code
Definition: AmdCL2BinGen.h:114
uint32_t dimMask
mask of dimension (bits: 0 - X, 1 - Y, 2 - Z)
Definition: AmdCL2BinGen.h:61
size_t isaMetadataSize
metadata size (used if useConfig=false)
Definition: AmdCL2BinGen.h:107
an array class
Definition: Containers.h:41
Configuration header.
uint32_t priority
priority
Definition: AmdCL2BinGen.h:68
cxuint RelocType
relocation type
Definition: Commons.h:33
bool ieeeMode
IEEE mode.
Definition: AmdCL2BinGen.h:72
RelocType type
relocation type
Definition: AmdCL2BinGen.h:92
const cxbyte * samplerInit
sampler init data
Definition: AmdCL2BinGen.h:135
uint32_t scratchBufferSize
size of scratch buffer
Definition: AmdCL2BinGen.h:71
std::vector< size_t > samplerOffsets
sampler offsets
Definition: AmdCL2BinGen.h:138
bool useConfig
true if configuration has been used to generate binary
Definition: AmdCL2BinGen.h:109
cxbyte exceptions
enabled exception handling
Definition: AmdCL2BinGen.h:73
std::vector< BinSymbol > innerExtraSymbols
list of extra symbols
Definition: AmdCL2BinGen.h:146
std::vector< AmdKernelArgInput > args
arguments
Definition: AmdCL2BinGen.h:59
input output utilities
uint32_t reqdWorkGroupSize[3]
reqd_work_group_size
Definition: AmdCL2BinGen.h:62
argument to read and write
Definition: AmdCL2BinGen.h:45
const cxbyte * setup
kernel setup size (used if useConfig=false)
Definition: AmdCL2BinGen.h:104
uint32_t workGroupSizeHint[3]
workGroupSizeHint
Definition: AmdCL2BinGen.h:83
unsigned char cxbyte
unsigned byte
Definition: Config.h:229
size_t codeSize
code size
Definition: AmdCL2BinGen.h:113
main namespace
Definition: AsmDefs.h:38
size_t setupSize
kernel setup size (used if useConfig=false)
Definition: AmdCL2BinGen.h:103
bool useEnqueue
this kernel enqueues other kernel
Definition: AmdCL2BinGen.h:80
const cxbyte * isaMetadata
kernel&#39;s metadata (used if useConfig=false)
Definition: AmdCL2BinGen.h:108
const cxbyte * code
HSA text code.
Definition: AmdCL2BinGen.h:130
uint32_t floatMode
float mode
Definition: AmdCL2BinGen.h:67
unsigned int cxuint
unsigned int
Definition: Config.h:237
AmdCL2KernelConfig config
kernel&#39;s configuration
Definition: AmdCL2BinGen.h:111
size_t offset
kernel offset in (from setup data) code
Definition: AmdCL2BinGen.h:115
size_t globalDataSize
global constant data size
Definition: AmdCL2BinGen.h:125
std::vector< AmdCL2RelInput > relocations
relocation to kernel code
Definition: AmdCL2BinGen.h:112
std::vector< uint32_t > samplers
sampler config
Definition: AmdCL2BinGen.h:137
cxuint symbol
symbol (0 - globaldata, 1 - atomicdata)
Definition: AmdCL2BinGen.h:93
argument not used
Definition: AmdCL2BinGen.h:42
CString kernelName
kernel name
Definition: AmdCL2BinGen.h:100
GPUDeviceType
type of GPU device
Definition: GPUId.h:51
CString vecTypeHint
vectypehint
Definition: AmdCL2BinGen.h:82
bool debugMode
debug mode
Definition: AmdCL2BinGen.h:75
size_t bssSize
global bss size
Definition: AmdCL2BinGen.h:133
uint32_t pgmRSRC2
pgmRSRC2 register value
Definition: AmdCL2BinGen.h:66
std::vector< BinSection > innerExtraSections
list of extra sections
Definition: AmdCL2BinGen.h:145
kernel configuration
Definition: AmdCL2BinGen.h:57
bool useGeneric
use generic pointer addresses (for flat instrs)
Definition: AmdCL2BinGen.h:81
std::vector< AmdCL2KernelInput > kernels
kernels
Definition: AmdCL2BinGen.h:142
uint32_t usedSGPRsNum
number of used SGPRs
Definition: AmdCL2BinGen.h:64
size_t rwDataSize
global rw data size
Definition: AmdCL2BinGen.h:127
AMD binaries generator.
main Input for AmdCL2GPUBinGenerator
Definition: AmdCL2BinGen.h:119
bool useSetup
use setup buffer (local sizes, global sizes)
Definition: AmdCL2BinGen.h:78
const cxbyte * globalData
global constant data
Definition: AmdCL2BinGen.h:126
argument to read
Definition: AmdCL2BinGen.h:43
GPU identification utilities.
const cxbyte * rwData
global rw data
Definition: AmdCL2BinGen.h:128
size_t offset
offset
Definition: AmdCL2BinGen.h:91
bool samplerConfig
use sample config instead raw data from samplerinit
Definition: AmdCL2BinGen.h:136
const AmdCL2Input * getInput() const
get input
Definition: AmdCL2BinGen.h:193
std::vector< cxuint > samplers
defined samplers
Definition: AmdCL2BinGen.h:60
GPUDeviceType deviceType
GPU device type.
Definition: AmdCL2BinGen.h:122
size_t bssAlignment
alignment of global bss
Definition: AmdCL2BinGen.h:132
uint32_t archMinor
arch minor
Definition: AmdCL2BinGen.h:123
const cxbyte * metadata
kernel&#39;s metadata (used if useConfig=false)
Definition: AmdCL2BinGen.h:106
std::vector< AmdCL2RelInput > relocations
relocation to main code
Definition: AmdCL2BinGen.h:131
size_t codeSize
code size
Definition: AmdCL2BinGen.h:129
bool useArgs
use argument&#39;s buffer
Definition: AmdCL2BinGen.h:79
size_t metadataSize
metadata size (used if useConfig=false)
Definition: AmdCL2BinGen.h:105
simple C-string container
Definition: CString.h:38
uint32_t gdsSize
GDS size.
Definition: AmdCL2BinGen.h:70
containers and other utils for other libraries and programs
bool is64Bit
if binary is 64-bit
Definition: AmdCL2BinGen.h:121