CLRX  1
An unofficial OpenCL extensions designed for Radeon GPUs
Commons.h
Go to the documentation of this file.
1 /*
2  * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3  * Copyright (C) 2014-2018 Mateusz Szpakowski
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
23 #ifndef __CLRX_COMMONS_H__
24 #define __CLRX_COMMONS_H__
25 
26 #include <CLRX/Config.h>
27 #include <CLRX/utils/MemAccess.h>
28 
30 namespace CLRX
31 {
33 typedef cxuint RelocType;
34 
35 enum
36 {
40 };
41 
42 enum {
44  AMDHSAFLAG_USE_DISPATCH_PTR = 2,
47  AMDHSAFLAG_USE_DISPATCH_ID = 16,
48  AMDHSAFLAG_USE_FLAT_SCRATCH_INIT = 32,
50  AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_BIT = 7,
54 
55  AMDHSAFLAG_USE_ORDERED_APPEND_GDS = 1,
58  AMDHSAFLAG_USE_DYNAMIC_CALL_STACK = 16,
61 };
62 
65 {
68  uint16_t amdMachineKind;
69  uint16_t amdMachineMajor;
70  uint16_t amdMachineMinor;
71  uint16_t amdMachineStepping;
74  uint64_t kernelCodePrefetchSize;
75  uint64_t maxScrachBackingMemorySize;
76  uint32_t computePgmRsrc1;
77  uint32_t computePgmRsrc2;
79  uint16_t enableFeatureFlags;
82  uint32_t gdsSegmentSize;
83  uint64_t kernargSegmentSize;
84  uint32_t workgroupFbarrierCount;
85  uint16_t wavefrontSgprCount;
86  uint16_t workitemVgprCount;
87  uint16_t reservedVgprFirst;
88  uint16_t reservedVgprCount;
89  uint16_t reservedSgprFirst;
90  uint16_t reservedSgprCount;
91  uint16_t debugWavefrontPrivateSegmentOffsetSgpr;
92  uint16_t debugPrivateSegmentBufferSgpr;
97  uint32_t callConvention;
98  uint32_t reserved1[3];
99  uint64_t runtimeLoaderKernelSymbol;
101 
102  void toLE()
103  {
104  SLEV(amdCodeVersionMajor, amdCodeVersionMajor);
105  SLEV(amdCodeVersionMinor, amdCodeVersionMinor);
106  SLEV(amdMachineKind, amdMachineKind);
107  SLEV(amdMachineMajor, amdMachineMajor);
108  SLEV(amdMachineMinor, amdMachineMinor);
109  SLEV(amdMachineStepping, amdMachineStepping);
110  SLEV(kernelCodeEntryOffset, kernelCodeEntryOffset);
111  SLEV(kernelCodePrefetchOffset, kernelCodePrefetchOffset);
112  SLEV(kernelCodePrefetchSize, kernelCodePrefetchSize);
113  SLEV(maxScrachBackingMemorySize, maxScrachBackingMemorySize);
114  SLEV(computePgmRsrc1, computePgmRsrc1);
115  SLEV(computePgmRsrc2, computePgmRsrc2);
116  SLEV(enableSgprRegisterFlags, enableSgprRegisterFlags);
117  SLEV(enableFeatureFlags, enableFeatureFlags);
118  SLEV(workitemPrivateSegmentSize, workitemPrivateSegmentSize);
119  SLEV(workgroupGroupSegmentSize, workgroupGroupSegmentSize);
120  SLEV(gdsSegmentSize, gdsSegmentSize);
121  SLEV(kernargSegmentSize, kernargSegmentSize);
122  SLEV(workgroupFbarrierCount, workgroupFbarrierCount);
123  SLEV(wavefrontSgprCount, wavefrontSgprCount);
124  SLEV(workitemVgprCount, workitemVgprCount);
125  SLEV(reservedVgprFirst, reservedVgprFirst);
126  SLEV(reservedVgprCount, reservedVgprCount);
127  SLEV(reservedSgprFirst, reservedSgprFirst);
128  SLEV(reservedSgprCount, reservedSgprCount);
129  SLEV(debugWavefrontPrivateSegmentOffsetSgpr,
130  debugWavefrontPrivateSegmentOffsetSgpr);
131  SLEV(debugPrivateSegmentBufferSgpr, debugPrivateSegmentBufferSgpr);
132  SLEV(callConvention, callConvention);
133  SLEV(runtimeLoaderKernelSymbol, runtimeLoaderKernelSymbol);
134  }
135 };
136 
137 };
138 
139 #endif
use private segment size
Definition: Commons.h:49
uint16_t amdMachineStepping
arch stepping number
Definition: Commons.h:71
uint32_t workitemPrivateSegmentSize
workitem private (scratchbuffer) segment size
Definition: Commons.h:80
use workgroup count for Y dim
Definition: Commons.h:52
uint16_t amdMachineMinor
arch minor number
Definition: Commons.h:70
use 64-bit pointers
Definition: Commons.h:57
uint64_t kernelCodePrefetchOffset
kernel code prefetch offset
Definition: Commons.h:73
uint32_t workgroupGroupSegmentSize
workgroup group segment (local memory) size
Definition: Commons.h:81
cxbyte privateSegmentAlignment
private segment alignment
Definition: Commons.h:95
AMD HSA kernel configuration structure.
Definition: Commons.h:64
cxbyte kernargSegmentAlignment
kernel segment alignment
Definition: Commons.h:93
uint32_t gdsSegmentSize
GDS segment size.
Definition: Commons.h:82
uint16_t reservedSgprCount
reserved scalar register count
Definition: Commons.h:90
use kernel argument segment pointer
Definition: Commons.h:46
Configuration header.
relocation that get low 32-bit of value
Definition: Commons.h:38
cxuint RelocType
relocation type
Definition: Commons.h:33
use private segment buffer
Definition: Commons.h:43
cxbyte groupSegmentAlignment
group segment alignment
Definition: Commons.h:94
uint16_t enableFeatureFlags
bitfield of feature flags
Definition: Commons.h:79
relocation that get high 32-bit of value
Definition: Commons.h:39
xnack enabled
Definition: Commons.h:60
uint32_t computePgmRsrc1
PGMRSRC1 register value.
Definition: Commons.h:76
uint32_t computePgmRsrc2
PGMRSRC2 register value.
Definition: Commons.h:77
unsigned char cxbyte
unsigned byte
Definition: Config.h:215
main namespace
Definition: AsmDefs.h:38
debug enabled
Definition: Commons.h:59
unsigned int cxuint
unsigned int
Definition: Config.h:223
use ordered append gds
Definition: Commons.h:56
uint32_t callConvention
call convention
Definition: Commons.h:97
uint16_t reservedSgprFirst
reserved first scalar register
Definition: Commons.h:89
inlines for accessing memory words in LittleEndian and unaligned
uint16_t wavefrontSgprCount
scalar register count per wavefront
Definition: Commons.h:85
cxbyte controlDirective[128]
control directives area
Definition: Commons.h:100
uint16_t enableSgprRegisterFlags
bitfield of sg
Definition: Commons.h:78
uint16_t workitemVgprCount
vector register count per workitem
Definition: Commons.h:86
uint16_t amdMachineKind
architecture kind
Definition: Commons.h:68
use queue pointer
Definition: Commons.h:45
uint32_t amdCodeVersionMinor
AMD code version minor number.
Definition: Commons.h:67
use workgroup count for X dim
Definition: Commons.h:51
void SLEV(uint8_t &r, uint8_t v)
save from/to little endian value
Definition: MemAccess.h:246
uint32_t reserved1[3]
reserved
Definition: Commons.h:98
relocation that get value
Definition: Commons.h:37
uint16_t amdMachineMajor
arch major number
Definition: Commons.h:69
use workgroup count for Z dim
Definition: Commons.h:53
uint16_t reservedVgprFirst
reserved first vector register
Definition: Commons.h:87
cxbyte wavefrontSize
wavefront size
Definition: Commons.h:96
uint64_t kernargSegmentSize
kernel argument segment size
Definition: Commons.h:83
uint64_t kernelCodeEntryOffset
kernel relative to this config to kernel code
Definition: Commons.h:72
uint16_t reservedVgprCount
reserved vector register count
Definition: Commons.h:88
uint32_t amdCodeVersionMajor
AMD code version major number.
Definition: Commons.h:66