CLRX  1
An unofficial OpenCL extensions designed for Radeon GPUs
AmdCL2BinGen.h
Go to the documentation of this file.
1 /*
2  * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3  * Copyright (C) 2014-2017 Mateusz Szpakowski
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
23 #ifndef __CLRX_AMDCL2BINGEN_H__
24 #define __CLRX_AMDCL2BINGEN_H__
25 
26 #include <CLRX/Config.h>
27 #include <cstddef>
28 #include <cstdint>
29 #include <string>
30 #include <ostream>
31 #include <vector>
32 #include <CLRX/amdbin/Commons.h>
33 #include <CLRX/amdbin/AmdBinGen.h>
34 #include <CLRX/utils/Containers.h>
35 #include <CLRX/utils/GPUId.h>
36 #include <CLRX/utils/InputOutput.h>
37 
38 namespace CLRX
39 {
40 
41 enum: cxbyte {
46 };
47 
48 enum: cxuint {
49  AMDCL2SECTID_SAMPLERINIT = ELFSECTID_OTHER_BUILTIN,
50  AMDCL2SECTID_TEXTRELA,
51  AMDCL2SECTID_RODATARELA,
52  AMDCL2SECTID_NOTE,
53  AMDCL2SECTID_MAX = AMDCL2SECTID_NOTE
54 };
55 
58 {
59  std::vector<AmdKernelArgInput> args;
60  std::vector<cxuint> samplers;
61  uint32_t dimMask;
62  uint32_t reqdWorkGroupSize[3];
63  uint32_t usedVGPRsNum;
64  uint32_t usedSGPRsNum;
65  uint32_t pgmRSRC1;
66  uint32_t pgmRSRC2;
67  uint32_t floatMode;
68  uint32_t priority;
69  size_t localSize;
70  uint32_t gdsSize;
71  uint32_t scratchBufferSize;
72  bool ieeeMode;
74  bool tgSize;
75  bool debugMode;
77  bool dx10Clamp;
78  bool useSetup;
79  bool useArgs;
80  bool useEnqueue;
81  bool useGeneric;
82 
83  size_t calculateKernelArgSize(bool is64Bit, bool newBinaries) const;
84 };
85 
88 {
89  size_t offset;
92  size_t addend;
93 };
94 
97 {
99  size_t stubSize;
100  const cxbyte* stub;
101  size_t setupSize;
102  const cxbyte* setup;
103  size_t metadataSize;
104  const cxbyte* metadata;
107  bool useConfig;
108  bool hsaConfig;
110  std::vector<AmdCL2RelInput> relocations;
111  size_t codeSize;
112  const cxbyte* code;
113 };
114 
117 {
118  bool is64Bit;
120  uint32_t archMinor;
121  uint32_t archStepping;
122  size_t globalDataSize;
124  size_t rwDataSize;
125  const cxbyte* rwData;
126  size_t bssAlignment;
127  size_t bssSize;
131  std::vector<uint32_t> samplers;
132  std::vector<size_t> samplerOffsets;
133  uint32_t driverVersion;
136  std::vector<AmdCL2KernelInput> kernels;
137  std::vector<BinSection> extraSections;
138  std::vector<BinSymbol> extraSymbols;
139  std::vector<BinSection> innerExtraSections;
140  std::vector<BinSymbol> innerExtraSymbols;
141 
143  void addEmptyKernel(const char* kernelName);
144 };
145 
148 {
149 private:
150  bool manageable;
151  const AmdCL2Input* input;
152 
153  void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
154  Array<cxbyte>* aPtr) const;
155 public:
157 
159  AmdCL2GPUBinGenerator(const AmdCL2Input* amdInput);
161 
173  AmdCL2GPUBinGenerator(bool _64bitMode, GPUDeviceType deviceType,
174  uint32_t archMinor, uint32_t archStepping, uint32_t driverVersion,
175  size_t globalDataSize, const cxbyte* globalData,
176  size_t rwDataSize, const cxbyte* rwData,
177  const std::vector<AmdCL2KernelInput>& kernelInputs);
179  AmdCL2GPUBinGenerator(bool _64bitMode, GPUDeviceType deviceType,
180  uint32_t archMinor, uint32_t archStepping, uint32_t driverVersion,
181  size_t globalDataSize, const cxbyte* globalData,
182  size_t rwDataSize, const cxbyte* rwData,
183  std::vector<AmdCL2KernelInput>&& kernelInputs);
185 
187  const AmdCL2Input* getInput() const
188  { return input; }
189 
191  void setInput(const AmdCL2Input* input);
192 
194  void generate(Array<cxbyte>& array) const;
195 
197  void generate(std::ostream& os) const;
198 
200  void generate(std::vector<char>& vector) const;
201 };
202 
203 };
204 
205 #endif
main AMD OpenCL2.0 GPU Binary generator
Definition: AmdCL2BinGen.h:147
std::vector< BinSymbol > extraSymbols
extra symbols
Definition: AmdCL2BinGen.h:138
non copyable and non movable base structure (class)
Definition: Utilities.h:43
bool hsaConfig
true if configuration in setup as HSA config
Definition: AmdCL2BinGen.h:108
argument not used
Definition: AmdCL2BinGen.h:42
uint32_t pgmRSRC1
pgmRSRC1 register value
Definition: AmdCL2BinGen.h:65
bool tgSize
enable tgSize
Definition: AmdCL2BinGen.h:74
AMD CL2 Relocation entry input.
Definition: AmdCL2BinGen.h:87
AMD kernel input.
Definition: AmdCL2BinGen.h:96
std::vector< BinSection > extraSections
extra sections
Definition: AmdCL2BinGen.h:137
const cxbyte * stub
kernel stub size (used if useConfig=false)
Definition: AmdCL2BinGen.h:100
size_t samplerInitSize
sampler init size
Definition: AmdCL2BinGen.h:128
bool privilegedMode
prvileged mode
Definition: AmdCL2BinGen.h:76
size_t localSize
used local size (not local defined in kernel arguments)
Definition: AmdCL2BinGen.h:69
CString compileOptions
compile options
Definition: AmdCL2BinGen.h:134
size_t addend
addend
Definition: AmdCL2BinGen.h:92
uint32_t usedVGPRsNum
number of used VGPRs
Definition: AmdCL2BinGen.h:63
size_t stubSize
kernel stub size (used if useConfig=false)
Definition: AmdCL2BinGen.h:99
uint32_t archStepping
arch stepping
Definition: AmdCL2BinGen.h:121
bool dx10Clamp
DX10 CLAMP mode.
Definition: AmdCL2BinGen.h:77
uint32_t driverVersion
driver version (majorVersion*100 + minorVersion)
Definition: AmdCL2BinGen.h:133
common definitions for binaries
CString aclVersion
acl version string
Definition: AmdCL2BinGen.h:135
const cxbyte * code
code
Definition: AmdCL2BinGen.h:112
uint32_t dimMask
mask of dimension (bits: 0 - X, 1 - Y, 2 - Z)
Definition: AmdCL2BinGen.h:61
size_t isaMetadataSize
metadata size (used if useConfig=false)
Definition: AmdCL2BinGen.h:105
an array class
Definition: Containers.h:38
Configuration header.
uint32_t priority
priority
Definition: AmdCL2BinGen.h:68
cxuint RelocType
relocation type
Definition: Commons.h:33
bool ieeeMode
IEEE mode.
Definition: AmdCL2BinGen.h:72
RelocType type
relocation type
Definition: AmdCL2BinGen.h:90
const cxbyte * samplerInit
sampler init data
Definition: AmdCL2BinGen.h:129
uint32_t scratchBufferSize
size of scratch buffer
Definition: AmdCL2BinGen.h:71
std::vector< size_t > samplerOffsets
sampler offsets
Definition: AmdCL2BinGen.h:132
bool useConfig
true if configuration has been used to generate binary
Definition: AmdCL2BinGen.h:107
cxbyte exceptions
enabled exception handling
Definition: AmdCL2BinGen.h:73
std::vector< BinSymbol > innerExtraSymbols
list of extra symbols
Definition: AmdCL2BinGen.h:140
std::vector< AmdKernelArgInput > args
arguments
Definition: AmdCL2BinGen.h:59
input output utilities
uint32_t reqdWorkGroupSize[3]
reqd_work_group_size
Definition: AmdCL2BinGen.h:62
const cxbyte * setup
kernel setup size (used if useConfig=false)
Definition: AmdCL2BinGen.h:102
unsigned char cxbyte
unsigned byte
Definition: Config.h:213
size_t codeSize
code size
Definition: AmdCL2BinGen.h:111
main namespace
Definition: AsmDefs.h:38
size_t setupSize
kernel setup size (used if useConfig=false)
Definition: AmdCL2BinGen.h:101
bool useEnqueue
this kernel enqueues other kernel
Definition: AmdCL2BinGen.h:80
const cxbyte * isaMetadata
kernel&#39;s metadata (used if useConfig=false)
Definition: AmdCL2BinGen.h:106
uint32_t floatMode
float mode
Definition: AmdCL2BinGen.h:67
unsigned int cxuint
unsigned int
Definition: Config.h:221
AmdCL2KernelConfig config
kernel&#39;s configuration
Definition: AmdCL2BinGen.h:109
size_t globalDataSize
global constant data size
Definition: AmdCL2BinGen.h:122
std::vector< AmdCL2RelInput > relocations
relocation to kernel code
Definition: AmdCL2BinGen.h:110
std::vector< uint32_t > samplers
sampler config
Definition: AmdCL2BinGen.h:131
cxuint symbol
symbol (0 - globaldata, 1 - atomicdata)
Definition: AmdCL2BinGen.h:91
CString kernelName
kernel name
Definition: AmdCL2BinGen.h:98
GPUDeviceType
type of GPU device
Definition: GPUId.h:38
bool debugMode
debug mode
Definition: AmdCL2BinGen.h:75
size_t bssSize
global bss size
Definition: AmdCL2BinGen.h:127
uint32_t pgmRSRC2
pgmRSRC2 register value
Definition: AmdCL2BinGen.h:66
std::vector< BinSection > innerExtraSections
list of extra sections
Definition: AmdCL2BinGen.h:139
kernel configuration
Definition: AmdCL2BinGen.h:57
bool useGeneric
use generic pointer addresses (for flat instrs)
Definition: AmdCL2BinGen.h:81
std::vector< AmdCL2KernelInput > kernels
kernels
Definition: AmdCL2BinGen.h:136
uint32_t usedSGPRsNum
number of used SGPRs
Definition: AmdCL2BinGen.h:64
size_t rwDataSize
global rw data size
Definition: AmdCL2BinGen.h:124
argument to read
Definition: AmdCL2BinGen.h:43
AMD binaries generator.
main Input for AmdCL2GPUBinGenerator
Definition: AmdCL2BinGen.h:116
bool useSetup
use setup buffer (local sizes, global sizes)
Definition: AmdCL2BinGen.h:78
const cxbyte * globalData
global constant data
Definition: AmdCL2BinGen.h:123
GPU identification utilities.
const cxbyte * rwData
global rw data
Definition: AmdCL2BinGen.h:125
size_t offset
offset
Definition: AmdCL2BinGen.h:89
bool samplerConfig
use sample config instead raw data from samplerinit
Definition: AmdCL2BinGen.h:130
const AmdCL2Input * getInput() const
get input
Definition: AmdCL2BinGen.h:187
std::vector< cxuint > samplers
defined samplers
Definition: AmdCL2BinGen.h:60
GPUDeviceType deviceType
GPU device type.
Definition: AmdCL2BinGen.h:119
size_t bssAlignment
alignment of global bss
Definition: AmdCL2BinGen.h:126
uint32_t archMinor
arch minor
Definition: AmdCL2BinGen.h:120
const cxbyte * metadata
kernel&#39;s metadata (used if useConfig=false)
Definition: AmdCL2BinGen.h:104
argument to write
Definition: AmdCL2BinGen.h:44
bool useArgs
use argument&#39;s buffer
Definition: AmdCL2BinGen.h:79
size_t metadataSize
metadata size (used if useConfig=false)
Definition: AmdCL2BinGen.h:103
simple C-string container
Definition: CString.h:38
uint32_t gdsSize
GDS size.
Definition: AmdCL2BinGen.h:70
containers and other utils for other libraries and programs
bool is64Bit
if binary is 64-bit
Definition: AmdCL2BinGen.h:118
argument to read and write
Definition: AmdCL2BinGen.h:45