CLRX  1
An unofficial OpenCL extensions designed for Radeon GPUs
AmdCL2BinGen.h
Go to the documentation of this file.
1 /*
2  * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3  * Copyright (C) 2014-2017 Mateusz Szpakowski
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
23 #ifndef __CLRX_AMDCL2BINGEN_H__
24 #define __CLRX_AMDCL2BINGEN_H__
25 
26 #include <CLRX/Config.h>
27 #include <cstddef>
28 #include <cstdint>
29 #include <string>
30 #include <ostream>
31 #include <vector>
32 #include <CLRX/amdbin/Commons.h>
33 #include <CLRX/amdbin/AmdBinGen.h>
34 #include <CLRX/utils/Containers.h>
35 #include <CLRX/utils/GPUId.h>
36 #include <CLRX/utils/InputOutput.h>
37 
38 namespace CLRX
39 {
40 
41 enum: cxbyte {
46 };
47 
48 enum: cxuint {
49  AMDCL2SECTID_SAMPLERINIT = ELFSECTID_OTHER_BUILTIN,
50  AMDCL2SECTID_TEXTRELA,
51  AMDCL2SECTID_RODATARELA,
52  AMDCL2SECTID_NOTE,
53  AMDCL2SECTID_MAX = AMDCL2SECTID_NOTE
54 };
55 
58 {
59  std::vector<AmdKernelArgInput> args;
60  std::vector<cxuint> samplers;
61  uint32_t dimMask;
62  uint32_t reqdWorkGroupSize[3];
63  uint32_t usedVGPRsNum;
64  uint32_t usedSGPRsNum;
65  uint32_t pgmRSRC1;
66  uint32_t pgmRSRC2;
67  uint32_t floatMode;
68  uint32_t priority;
69  size_t localSize;
70  uint32_t gdsSize;
71  uint32_t scratchBufferSize;
72  bool ieeeMode;
73  cxbyte exceptions;
74  bool tgSize;
75  bool debugMode;
77  bool dx10Clamp;
78  bool useSetup;
79  bool useArgs;
80  bool useEnqueue;
81  bool useGeneric;
82 };
83 
86 {
87  size_t offset;
89  cxuint symbol;
90  size_t addend;
91 };
92 
95 {
97  size_t stubSize;
98  const cxbyte* stub;
99  size_t setupSize;
100  const cxbyte* setup;
101  size_t metadataSize;
102  const cxbyte* metadata;
104  const cxbyte* isaMetadata;
105  bool useConfig;
107  std::vector<AmdCL2RelInput> relocations;
108  size_t codeSize;
109  const cxbyte* code;
110 };
111 
114 {
115  bool is64Bit;
117  uint32_t archMinor;
118  uint32_t archStepping;
119  size_t globalDataSize;
120  const cxbyte* globalData;
121  size_t rwDataSize;
122  const cxbyte* rwData;
123  size_t bssAlignment;
124  size_t bssSize;
126  const cxbyte* samplerInit;
128  std::vector<uint32_t> samplers;
129  std::vector<size_t> samplerOffsets;
130  uint32_t driverVersion;
133  std::vector<AmdCL2KernelInput> kernels;
134  std::vector<BinSection> extraSections;
135  std::vector<BinSymbol> extraSymbols;
136  std::vector<BinSection> innerExtraSections;
137  std::vector<BinSymbol> innerExtraSymbols;
138 
140  void addEmptyKernel(const char* kernelName);
141 };
142 
145 {
146 private:
147  bool manageable;
148  const AmdCL2Input* input;
149 
150  void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
151  Array<cxbyte>* aPtr) const;
152 public:
154 
156  AmdCL2GPUBinGenerator(const AmdCL2Input* amdInput);
158 
170  AmdCL2GPUBinGenerator(bool _64bitMode, GPUDeviceType deviceType,
171  uint32_t archMinor, uint32_t archStepping, uint32_t driverVersion,
172  size_t globalDataSize, const cxbyte* globalData,
173  size_t rwDataSize, const cxbyte* rwData,
174  const std::vector<AmdCL2KernelInput>& kernelInputs);
176  AmdCL2GPUBinGenerator(bool _64bitMode, GPUDeviceType deviceType,
177  uint32_t archMinor, uint32_t archStepping, uint32_t driverVersion,
178  size_t globalDataSize, const cxbyte* globalData,
179  size_t rwDataSize, const cxbyte* rwData,
180  std::vector<AmdCL2KernelInput>&& kernelInputs);
182 
184  const AmdCL2Input* getInput() const
185  { return input; }
186 
188  void setInput(const AmdCL2Input* input);
189 
191  void generate(Array<cxbyte>& array) const;
192 
194  void generate(std::ostream& os) const;
195 
197  void generate(std::vector<char>& vector) const;
198 };
199 
200 };
201 
202 #endif
main AMD OpenCL2.0 GPU Binary generator
Definition: AmdCL2BinGen.h:144
std::vector< BinSymbol > extraSymbols
extra symbols
Definition: AmdCL2BinGen.h:135
non copyable and non movable base structure (class)
Definition: Utilities.h:43
argument not used
Definition: AmdCL2BinGen.h:42
uint32_t pgmRSRC1
pgmRSRC1 register value
Definition: AmdCL2BinGen.h:65
bool tgSize
enable tgSize
Definition: AmdCL2BinGen.h:74
AMD CL2 Relocation entry input.
Definition: AmdCL2BinGen.h:85
AMD kernel input.
Definition: AmdCL2BinGen.h:94
std::vector< BinSection > extraSections
extra sections
Definition: AmdCL2BinGen.h:134
const cxbyte * stub
kernel stub size (used if useConfig=false)
Definition: AmdCL2BinGen.h:98
size_t samplerInitSize
sampler init size
Definition: AmdCL2BinGen.h:125
bool privilegedMode
prvileged mode
Definition: AmdCL2BinGen.h:76
size_t localSize
used local size (not local defined in kernel arguments)
Definition: AmdCL2BinGen.h:69
CString compileOptions
compile options
Definition: AmdCL2BinGen.h:131
size_t addend
addend
Definition: AmdCL2BinGen.h:90
uint32_t usedVGPRsNum
number of used VGPRs
Definition: AmdCL2BinGen.h:63
size_t stubSize
kernel stub size (used if useConfig=false)
Definition: AmdCL2BinGen.h:97
uint32_t archStepping
arch stepping
Definition: AmdCL2BinGen.h:118
bool dx10Clamp
DX10 CLAMP mode.
Definition: AmdCL2BinGen.h:77
uint32_t driverVersion
driver version (majorVersion*100 + minorVersion)
Definition: AmdCL2BinGen.h:130
common definitions for binaries
CString aclVersion
acl version string
Definition: AmdCL2BinGen.h:132
const cxbyte * code
code
Definition: AmdCL2BinGen.h:109
uint32_t dimMask
mask of dimension (bits: 0 - X, 1 - Y, 2 - Z)
Definition: AmdCL2BinGen.h:61
size_t isaMetadataSize
metadata size (used if useConfig=false)
Definition: AmdCL2BinGen.h:103
an array class
Definition: Containers.h:38
uint32_t priority
priority
Definition: AmdCL2BinGen.h:68
cxuint RelocType
relocation type
Definition: Commons.h:32
bool ieeeMode
IEEE mode.
Definition: AmdCL2BinGen.h:72
RelocType type
relocation type
Definition: AmdCL2BinGen.h:88
const cxbyte * samplerInit
sampler init data
Definition: AmdCL2BinGen.h:126
uint32_t scratchBufferSize
size of scratch buffer
Definition: AmdCL2BinGen.h:71
std::vector< size_t > samplerOffsets
sampler offsets
Definition: AmdCL2BinGen.h:129
bool useConfig
true if configuration has been used to generate binary
Definition: AmdCL2BinGen.h:105
cxbyte exceptions
enabled exception handling
Definition: AmdCL2BinGen.h:73
std::vector< BinSymbol > innerExtraSymbols
list of extra symbols
Definition: AmdCL2BinGen.h:137
std::vector< AmdKernelArgInput > args
arguments
Definition: AmdCL2BinGen.h:59
input output utilities
uint32_t reqdWorkGroupSize[3]
reqd_work_group_size
Definition: AmdCL2BinGen.h:62
const cxbyte * setup
kernel setup size (used if useConfig=false)
Definition: AmdCL2BinGen.h:100
size_t codeSize
code size
Definition: AmdCL2BinGen.h:108
main namespace
Definition: AsmDefs.h:38
size_t setupSize
kernel setup size (used if useConfig=false)
Definition: AmdCL2BinGen.h:99
bool useEnqueue
this kernel enqueues other kernel
Definition: AmdCL2BinGen.h:80
const cxbyte * isaMetadata
kernel&#39;s metadata (used if useConfig=false)
Definition: AmdCL2BinGen.h:104
uint32_t floatMode
float mode
Definition: AmdCL2BinGen.h:67
AmdCL2KernelConfig config
kernel&#39;s configuration
Definition: AmdCL2BinGen.h:106
size_t globalDataSize
global constant data size
Definition: AmdCL2BinGen.h:119
std::vector< AmdCL2RelInput > relocations
relocation to kernel code
Definition: AmdCL2BinGen.h:107
std::vector< uint32_t > samplers
sampler config
Definition: AmdCL2BinGen.h:128
cxuint symbol
symbol (0 - globaldata, 1 - atomicdata)
Definition: AmdCL2BinGen.h:89
CString kernelName
kernel name
Definition: AmdCL2BinGen.h:96
GPUDeviceType
type of GPU device
Definition: GPUId.h:38
bool debugMode
debug mode
Definition: AmdCL2BinGen.h:75
size_t bssSize
global bss size
Definition: AmdCL2BinGen.h:124
uint32_t pgmRSRC2
pgmRSRC2 register value
Definition: AmdCL2BinGen.h:66
std::vector< BinSection > innerExtraSections
list of extra sections
Definition: AmdCL2BinGen.h:136
kernel configuration
Definition: AmdCL2BinGen.h:57
bool useGeneric
use generic pointer addresses (for flat instrs)
Definition: AmdCL2BinGen.h:81
std::vector< AmdCL2KernelInput > kernels
kernels
Definition: AmdCL2BinGen.h:133
uint32_t usedSGPRsNum
number of used SGPRs
Definition: AmdCL2BinGen.h:64
size_t rwDataSize
global rw data size
Definition: AmdCL2BinGen.h:121
argument to read
Definition: AmdCL2BinGen.h:43
AMD binaries generator.
main Input for AmdCL2GPUBinGenerator
Definition: AmdCL2BinGen.h:113
bool useSetup
use setup buffer (local sizes, global sizes)
Definition: AmdCL2BinGen.h:78
const cxbyte * globalData
global constant data
Definition: AmdCL2BinGen.h:120
GPU identification utilities.
const cxbyte * rwData
global rw data
Definition: AmdCL2BinGen.h:122
size_t offset
offset
Definition: AmdCL2BinGen.h:87
bool samplerConfig
use sample config instead raw data from samplerinit
Definition: AmdCL2BinGen.h:127
const AmdCL2Input * getInput() const
get input
Definition: AmdCL2BinGen.h:184
std::vector< cxuint > samplers
defined samplers
Definition: AmdCL2BinGen.h:60
GPUDeviceType deviceType
GPU device type.
Definition: AmdCL2BinGen.h:116
size_t bssAlignment
alignment of global bss
Definition: AmdCL2BinGen.h:123
uint32_t archMinor
arch minor
Definition: AmdCL2BinGen.h:117
const cxbyte * metadata
kernel&#39;s metadata (used if useConfig=false)
Definition: AmdCL2BinGen.h:102
argument to write
Definition: AmdCL2BinGen.h:44
bool useArgs
use argument&#39;s buffer
Definition: AmdCL2BinGen.h:79
size_t metadataSize
metadata size (used if useConfig=false)
Definition: AmdCL2BinGen.h:101
simple C-string container
Definition: CString.h:38
uint32_t gdsSize
GDS size.
Definition: AmdCL2BinGen.h:70
containers and other utils for other libraries and programs
bool is64Bit
if binary is 64-bit
Definition: AmdCL2BinGen.h:115
argument to read and write
Definition: AmdCL2BinGen.h:45