23 #ifndef __CLRX_ROCMBINARIES_H__ 24 #define __CLRX_ROCMBINARIES_H__ 26 #include <CLRX/Config.h> 31 #include <CLRX/amdbin/Elf.h> 76 std::unique_ptr<ROCmRegion[]> regions;
81 ROCmBinary(
size_t binaryCodeSize, cxbyte* binaryCode,
87 {
return regionsNum; }
91 {
return regions[index]; }
94 const ROCmRegion& getRegion(
const char* name)
const;
109 ROCMFLAG_USE_PRIVATE_SEGMENT_BUFFER = 1,
110 ROCMFLAG_USE_DISPATCH_PTR = 2,
111 ROCMFLAG_USE_QUEUE_PTR = 4,
112 ROCMFLAG_USE_KERNARG_SEGMENT_PTR = 8,
113 ROCMFLAG_USE_DISPATCH_ID = 16,
114 ROCMFLAG_USE_FLAT_SCRATCH_INIT = 32,
115 ROCMFLAG_USE_PRIVATE_SEGMENT_SIZE = 64,
116 ROCMFLAG_USE_GRID_WORKGROUP_COUNT_BIT = 7,
117 ROCMFLAG_USE_GRID_WORKGROUP_COUNT_X = 128,
118 ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Y = 256,
119 ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Z = 512,
121 ROCMFLAG_USE_ORDERED_APPEND_GDS = 1,
122 ROCMFLAG_PRIVATE_ELEM_SIZE_BIT = 1,
123 ROCMFLAG_USE_PTR64 = 8,
124 ROCMFLAG_USE_DYNAMIC_CALL_STACK = 16,
125 ROCMFLAG_USE_DEBUG_ENABLED = 32,
126 ROCMFLAG_USE_XNACK_ENABLED = 64
132 uint32_t amdCodeVersionMajor;
133 uint32_t amdCodeVersionMinor;
134 uint16_t amdMachineKind;
135 uint16_t amdMachineMajor;
136 uint16_t amdMachineMinor;
137 uint16_t amdMachineStepping;
138 uint64_t kernelCodeEntryOffset;
139 uint64_t kernelCodePrefetchOffset;
140 uint64_t kernelCodePrefetchSize;
141 uint64_t maxScrachBackingMemorySize;
142 uint32_t computePgmRsrc1;
143 uint32_t computePgmRsrc2;
144 uint16_t enableSpgrRegisterFlags;
145 uint16_t enableFeatureFlags;
146 uint32_t workitemPrivateSegmentSize;
147 uint32_t workgroupGroupSegmentSize;
148 uint32_t gdsSegmentSize;
149 uint64_t kernargSegmentSize;
150 uint32_t workgroupFbarrierCount;
151 uint16_t wavefrontSgprCount;
152 uint16_t workitemVgprCount;
153 uint16_t reservedVgprFirst;
154 uint16_t reservedVgprCount;
155 uint16_t reservedSgprFirst;
156 uint16_t reservedSgprCount;
157 uint16_t debugWavefrontPrivateSegmentOffsetSgpr;
158 uint16_t debugPrivateSegmentBufferSgpr;
159 cxbyte kernargSegmentAlignment;
160 cxbyte groupSegmentAlignment;
161 cxbyte privateSegmentAlignment;
162 cxbyte wavefrontSize;
163 uint32_t callConvention;
164 uint32_t reserved1[3];
165 uint64_t runtimeLoaderKernelSymbol;
166 cxbyte controlDirective[128];
170 extern bool isROCmBinary(
size_t binarySize,
const cxbyte* binary);
177 ROCMSECTID_HASH = ELFSECTID_OTHER_BUILTIN,
180 ROCMSECTID_GPUCONFIG,
181 ROCMSECTID_MAX = ROCMSECTID_GPUCONFIG
206 void addEmptyKernel(
const char* kernelName);
216 void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
224 size_t codeSize,
const cxbyte* code,
225 const std::vector<ROCmSymbolInput>& symbols);
227 size_t codeSize,
const cxbyte* code,
228 std::vector<ROCmSymbolInput>&& symbols);
242 void generate(std::ostream& os)
const;
245 void generate(std::vector<char>& vector)
const;
ROCm data region.
Definition: ROCmBinaries.h:58
non copyable and non movable base structure (class)
Definition: Utilities.h:43
uint32_t Flags
type for declaring various flags
Definition: Utilities.h:97
const ROCmInput * getInput() const
get input
Definition: ROCmBinaries.h:232
size_t size
data size
Definition: ROCmBinaries.h:61
class ElfBinaryTemplate< Elf64Types > ElfBinary64
type for 64-bit ELF binary
Definition: ElfBinaries.h:432
CString regionName
region name
Definition: ROCmBinaries.h:60
size_t offset
data
Definition: ROCmBinaries.h:62
an array class
Definition: Containers.h:38
const ROCmRegion & getRegion(size_t index) const
get region by index
Definition: ROCmBinaries.h:90
size_t getCodeSize() const
get code size
Definition: ROCmBinaries.h:97
ROCm main binary for GPU for 64-bit mode.
Definition: ROCmBinaries.h:70
data object
Definition: ROCmBinaries.h:52
bool isROCmBinary(size_t binarySize, const cxbyte *binary)
check whether is Amd OpenCL 2.0 binary
main namespace
Definition: AsmFormats.h:41
ROCmRegionType
ROCm region/symbol type.
Definition: ROCmBinaries.h:50
ROCm kernel configuration structure.
Definition: ROCmBinaries.h:130
creation flags for ELF binaries
Definition: ElfBinaries.h:73
inlines for accessing memory words in LittleEndian and unaligned
GPUDeviceType
type of GPU device
Definition: GPUId.h:38
const cxbyte * getCode() const
get code
Definition: ROCmBinaries.h:100
create region map
Definition: ROCmBinaries.h:44
Definition: ROCmBinaries.h:209
utilities for other libraries and programs
all ROCm binaries flags
Definition: ROCmBinaries.h:46
bool hasRegionMap() const
returns true if kernel map exists
Definition: ROCmBinaries.h:104
GPU identification utilities.
size_t getRegionsNum() const
get regions number
Definition: ROCmBinaries.h:86
OpenCL kernel to call ??
Definition: ROCmBinaries.h:54
simple C-string container
Definition: CString.h:38
containers and other utils for other libraries and programs
function kernel (code)
Definition: ROCmBinaries.h:53