CLRX  1
An unofficial OpenCL extensions designed for Radeon GPUs
AmdCL2BinGen.h
Go to the documentation of this file.
1 /*
2  * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3  * Copyright (C) 2014-2016 Mateusz Szpakowski
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
23 #ifndef __CLRX_AMDCL2BINGEN_H__
24 #define __CLRX_AMDCL2BINGEN_H__
25 
26 #include <CLRX/Config.h>
27 #include <cstddef>
28 #include <cstdint>
29 #include <string>
30 #include <ostream>
31 #include <vector>
32 #include <CLRX/amdbin/Commons.h>
33 #include <CLRX/amdbin/AmdBinGen.h>
34 #include <CLRX/utils/Containers.h>
35 #include <CLRX/utils/GPUId.h>
36 #include <CLRX/utils/InputOutput.h>
37 
38 namespace CLRX
39 {
40 
41 enum: cxbyte {
46 };
47 
48 enum: cxuint {
49  AMDCL2SECTID_SAMPLERINIT = ELFSECTID_OTHER_BUILTIN,
50  AMDCL2SECTID_TEXTRELA,
51  AMDCL2SECTID_RODATARELA,
52  AMDCL2SECTID_NOTE,
53  AMDCL2SECTID_MAX = AMDCL2SECTID_NOTE
54 };
55 
58 {
59  std::vector<AmdKernelArgInput> args;
60  std::vector<cxuint> samplers;
61  uint32_t dimMask;
62  uint32_t reqdWorkGroupSize[3];
63  uint32_t usedVGPRsNum;
64  uint32_t usedSGPRsNum;
65  uint32_t pgmRSRC1;
66  uint32_t pgmRSRC2;
67  uint32_t floatMode;
68  uint32_t priority;
69  size_t localSize;
70  uint32_t scratchBufferSize;
71  bool ieeeMode;
72  cxbyte exceptions;
73  bool tgSize;
74  bool debugMode;
76  bool dx10Clamp;
77  bool useSetup;
78  bool useArgs;
79  bool useEnqueue;
80  bool useGeneric;
81 };
82 
85 {
86  size_t offset;
88  cxuint symbol;
89  size_t addend;
90 };
91 
94 {
96  size_t stubSize;
97  const cxbyte* stub;
98  size_t setupSize;
99  const cxbyte* setup;
100  size_t metadataSize;
101  const cxbyte* metadata;
103  const cxbyte* isaMetadata;
104  bool useConfig;
106  std::vector<AmdCL2RelInput> relocations;
107  size_t codeSize;
108  const cxbyte* code;
109 };
110 
113 {
115  uint32_t archMinor;
116  uint32_t archStepping;
117  size_t globalDataSize;
118  const cxbyte* globalData;
119  size_t rwDataSize;
120  const cxbyte* rwData;
121  size_t bssAlignment;
122  size_t bssSize;
124  const cxbyte* samplerInit;
126  std::vector<uint32_t> samplers;
127  std::vector<size_t> samplerOffsets;
128  uint32_t driverVersion;
131  std::vector<AmdCL2KernelInput> kernels;
132  std::vector<BinSection> extraSections;
133  std::vector<BinSymbol> extraSymbols;
134  std::vector<BinSection> innerExtraSections;
135  std::vector<BinSymbol> innerExtraSymbols;
136 
138  void addEmptyKernel(const char* kernelName);
139 };
140 
143 {
144 private:
145  bool manageable;
146  const AmdCL2Input* input;
147 
148  void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
149  Array<cxbyte>* aPtr) const;
150 public:
152 
154  AmdCL2GPUBinGenerator(const AmdCL2Input* amdInput);
156 
168  uint32_t archMinor, uint32_t archStepping, uint32_t driverVersion,
169  size_t globalDataSize, const cxbyte* globalData,
170  size_t rwDataSize, const cxbyte* rwData,
171  const std::vector<AmdCL2KernelInput>& kernelInputs);
174  uint32_t archMinor, uint32_t archStepping, uint32_t driverVersion,
175  size_t globalDataSize, const cxbyte* globalData,
176  size_t rwDataSize, const cxbyte* rwData,
177  std::vector<AmdCL2KernelInput>&& kernelInputs);
179 
181  const AmdCL2Input* getInput() const
182  { return input; }
183 
185  void setInput(const AmdCL2Input* input);
186 
188  void generate(Array<cxbyte>& array) const;
189 
191  void generate(std::ostream& os) const;
192 
194  void generate(std::vector<char>& vector) const;
195 };
196 
197 };
198 
199 #endif
main AMD OpenCL2.0 GPU Binary generator
Definition: AmdCL2BinGen.h:142
std::vector< BinSymbol > extraSymbols
extra symbols
Definition: AmdCL2BinGen.h:133
non copyable and non movable base structure (class)
Definition: Utilities.h:43
uint32_t pgmRSRC1
pgmRSRC1 register value
Definition: AmdCL2BinGen.h:65
bool tgSize
enable tgSize
Definition: AmdCL2BinGen.h:73
AMD CL2 Relocation entry input.
Definition: AmdCL2BinGen.h:84
AMD kernel input.
Definition: AmdCL2BinGen.h:93
std::vector< BinSection > extraSections
extra sections
Definition: AmdCL2BinGen.h:132
const cxbyte * stub
kernel stub size (used if useConfig=false)
Definition: AmdCL2BinGen.h:97
size_t samplerInitSize
sampler init size
Definition: AmdCL2BinGen.h:123
bool privilegedMode
prvileged mode
Definition: AmdCL2BinGen.h:75
size_t localSize
used local size (not local defined in kernel arguments)
Definition: AmdCL2BinGen.h:69
CString compileOptions
compile options
Definition: AmdCL2BinGen.h:129
size_t addend
addend
Definition: AmdCL2BinGen.h:89
uint32_t usedVGPRsNum
number of used VGPRs
Definition: AmdCL2BinGen.h:63
size_t stubSize
kernel stub size (used if useConfig=false)
Definition: AmdCL2BinGen.h:96
uint32_t archStepping
arch minor
Definition: AmdCL2BinGen.h:116
bool dx10Clamp
DX10 CLAMP mode.
Definition: AmdCL2BinGen.h:76
uint32_t driverVersion
driver version (majorVersion*100 + minorVersion)
Definition: AmdCL2BinGen.h:128
common definitions for binaries
CString aclVersion
acl version string
Definition: AmdCL2BinGen.h:130
const cxbyte * code
code
Definition: AmdCL2BinGen.h:108
uint32_t dimMask
mask of dimension (bits: 0 - X, 1 - Y, 2 - Z)
Definition: AmdCL2BinGen.h:61
size_t isaMetadataSize
metadata size (used if useConfig=false)
Definition: AmdCL2BinGen.h:102
an array class
Definition: Containers.h:38
uint32_t priority
priority
Definition: AmdCL2BinGen.h:68
cxuint RelocType
relocation type
Definition: Commons.h:32
bool ieeeMode
IEEE mode.
Definition: AmdCL2BinGen.h:71
RelocType type
relocation type
Definition: AmdCL2BinGen.h:87
const cxbyte * samplerInit
sampler init data
Definition: AmdCL2BinGen.h:124
uint32_t scratchBufferSize
size of scratch buffer
Definition: AmdCL2BinGen.h:70
std::vector< size_t > samplerOffsets
sampler offsets
Definition: AmdCL2BinGen.h:127
bool useConfig
true if configuration has been used to generate binary
Definition: AmdCL2BinGen.h:104
cxbyte exceptions
enabled exception handling
Definition: AmdCL2BinGen.h:72
std::vector< BinSymbol > innerExtraSymbols
list of extra symbols
Definition: AmdCL2BinGen.h:135
std::vector< AmdKernelArgInput > args
arguments
Definition: AmdCL2BinGen.h:59
input output utilities
uint32_t reqdWorkGroupSize[3]
reqd_work_group_size
Definition: AmdCL2BinGen.h:62
const cxbyte * setup
kernel setup size (used if useConfig=false)
Definition: AmdCL2BinGen.h:99
size_t codeSize
code size
Definition: AmdCL2BinGen.h:107
argument not used
Definition: AmdCL2BinGen.h:42
main namespace
Definition: AsmFormats.h:41
size_t setupSize
kernel setup size (used if useConfig=false)
Definition: AmdCL2BinGen.h:98
bool useEnqueue
this kernel enqueues other kernel
Definition: AmdCL2BinGen.h:79
const cxbyte * isaMetadata
kernel&#39;s metadata (used if useConfig=false)
Definition: AmdCL2BinGen.h:103
uint32_t floatMode
float mode
Definition: AmdCL2BinGen.h:67
AmdCL2KernelConfig config
kernel&#39;s configuration
Definition: AmdCL2BinGen.h:105
argument to read and write
Definition: AmdCL2BinGen.h:45
size_t globalDataSize
arch stepping
Definition: AmdCL2BinGen.h:117
std::vector< AmdCL2RelInput > relocations
relocation to kernel code
Definition: AmdCL2BinGen.h:106
std::vector< uint32_t > samplers
sampler config
Definition: AmdCL2BinGen.h:126
cxuint symbol
symbol (0 - globaldata, 1 - atomicdata)
Definition: AmdCL2BinGen.h:88
CString kernelName
kernel name
Definition: AmdCL2BinGen.h:95
GPUDeviceType
type of GPU device
Definition: GPUId.h:38
bool debugMode
debug mode
Definition: AmdCL2BinGen.h:74
size_t bssSize
global bss size
Definition: AmdCL2BinGen.h:122
uint32_t pgmRSRC2
pgmRSRC2 register value
Definition: AmdCL2BinGen.h:66
std::vector< BinSection > innerExtraSections
list of extra sections
Definition: AmdCL2BinGen.h:134
kernel configuration
Definition: AmdCL2BinGen.h:57
bool useGeneric
use generic pointer addresses (for flat instrs)
Definition: AmdCL2BinGen.h:80
argument to write
Definition: AmdCL2BinGen.h:44
std::vector< AmdCL2KernelInput > kernels
kernels
Definition: AmdCL2BinGen.h:131
uint32_t usedSGPRsNum
number of used SGPRs
Definition: AmdCL2BinGen.h:64
size_t rwDataSize
global rw data size
Definition: AmdCL2BinGen.h:119
AMD binaries generator.
main Input for AmdCL2GPUBinGenerator
Definition: AmdCL2BinGen.h:112
bool useSetup
use setup buffer (local sizes, global sizes)
Definition: AmdCL2BinGen.h:77
const cxbyte * globalData
global constant data
Definition: AmdCL2BinGen.h:118
GPU identification utilities.
const cxbyte * rwData
global rw data
Definition: AmdCL2BinGen.h:120
size_t offset
offset
Definition: AmdCL2BinGen.h:86
bool samplerConfig
use sample config instead raw data from samplerinit
Definition: AmdCL2BinGen.h:125
const AmdCL2Input * getInput() const
get input
Definition: AmdCL2BinGen.h:181
std::vector< cxuint > samplers
defined samplers
Definition: AmdCL2BinGen.h:60
GPUDeviceType deviceType
GPU device type.
Definition: AmdCL2BinGen.h:114
size_t bssAlignment
alignment of global bss
Definition: AmdCL2BinGen.h:121
const cxbyte * metadata
kernel&#39;s metadata (used if useConfig=false)
Definition: AmdCL2BinGen.h:101
bool useArgs
use argument&#39;s buffer
Definition: AmdCL2BinGen.h:78
size_t metadataSize
metadata size (used if useConfig=false)
Definition: AmdCL2BinGen.h:100
simple C-string container
Definition: CString.h:38
containers and other utils for other libraries and programs
argument to read
Definition: AmdCL2BinGen.h:43