source: CLRX/CLRadeonExtender/trunk/CLRX/amdbin/Commons.h @ 3721

Last change on this file since 3721 was 3721, checked in by matszpk, 3 years ago

CLRadeonExtender: Fixed typo in Commons.h. DIsasmROCm commenting. Tentative version of the generateROCmMetadata.

File size: 6.3 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2018 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19/*! \file amdbin/Commons.h
20 * \brief common definitions for binaries
21 */
22
23#ifndef __CLRX_COMMONS_H__
24#define __CLRX_COMMONS_H__
25
26#include <CLRX/Config.h>
27#include <CLRX/utils/MemAccess.h>
28
29/// main namespace
30namespace CLRX
31{
32/// relocation type
33typedef cxuint RelocType;
34   
35enum
36{
37    RELTYPE_VALUE = 0,  ///< relocation that get value
38    RELTYPE_LOW_32BIT,    ///< relocation that get low 32-bit of value
39    RELTYPE_HIGH_32BIT    ///< relocation that get high 32-bit of value
40};
41
42enum {
43    AMDHSAFLAG_USE_PRIVATE_SEGMENT_BUFFER = 1,  ///< use private segment buffer
44    AMDHSAFLAG_USE_DISPATCH_PTR = 2,
45    AMDHSAFLAG_USE_QUEUE_PTR = 4,       ///< use queue pointer
46    AMDHSAFLAG_USE_KERNARG_SEGMENT_PTR = 8, ///< use kernel argument segment pointer
47    AMDHSAFLAG_USE_DISPATCH_ID = 16,
48    AMDHSAFLAG_USE_FLAT_SCRATCH_INIT = 32,
49    AMDHSAFLAG_USE_PRIVATE_SEGMENT_SIZE = 64,       ///< use private segment size
50    AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_BIT = 7,
51    AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_X = 128,    ///< use workgroup count for X dim
52    AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Y = 256,    ///< use workgroup count for Y dim
53    AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Z = 512,    ///< use workgroup count for Z dim
54   
55    AMDHSAFLAG_USE_ORDERED_APPEND_GDS = 1,  /// use ordered append gds
56    AMDHSAFLAG_PRIVATE_ELEM_SIZE_BIT = 1,
57    AMDHSAFLAG_USE_PTR64 = 8,       ///< use 64-bit pointers
58    AMDHSAFLAG_USE_DYNAMIC_CALL_STACK = 16,
59    AMDHSAFLAG_USE_DEBUG_ENABLED = 32,  ///< debug enabled
60    AMDHSAFLAG_USE_XNACK_ENABLED = 64   ///< xnack enabled
61};
62
63/// AMD HSA kernel configuration structure
64struct AmdHsaKernelConfig
65{
66    uint32_t amdCodeVersionMajor;   ///< AMD code version major number
67    uint32_t amdCodeVersionMinor;   ///< AMD code version minor number
68    uint16_t amdMachineKind;    ///< architecture kind
69    uint16_t amdMachineMajor;   ///< arch major number
70    uint16_t amdMachineMinor;   ///< arch minor number
71    uint16_t amdMachineStepping;    ///< arch stepping number
72    uint64_t kernelCodeEntryOffset;     ///< kernel relative to this config to kernel code
73    uint64_t kernelCodePrefetchOffset;  ///< kernel code prefetch offset
74    uint64_t kernelCodePrefetchSize;
75    uint64_t maxScrachBackingMemorySize;
76    uint32_t computePgmRsrc1;   ///< PGMRSRC1 register value
77    uint32_t computePgmRsrc2;   ///< PGMRSRC2 register value
78    uint16_t enableSgprRegisterFlags;   ///< bitfield of sg
79    uint16_t enableFeatureFlags;    ///< bitfield of feature flags
80    uint32_t workitemPrivateSegmentSize; ///< workitem private (scratchbuffer) segment size
81    uint32_t workgroupGroupSegmentSize; ///< workgroup group segment (local memory) size
82    uint32_t gdsSegmentSize;   ///< GDS segment size
83    uint64_t kernargSegmentSize;    ///< kernel argument segment size
84    uint32_t workgroupFbarrierCount;
85    uint16_t wavefrontSgprCount;    ///< scalar register count per wavefront
86    uint16_t workitemVgprCount;     ///< vector register count per workitem
87    uint16_t reservedVgprFirst;     ///< reserved first vector register
88    uint16_t reservedVgprCount;     ///< reserved vector register count
89    uint16_t reservedSgprFirst;     ///< reserved first scalar register
90    uint16_t reservedSgprCount;     ///< reserved scalar register count
91    uint16_t debugWavefrontPrivateSegmentOffsetSgpr;
92    uint16_t debugPrivateSegmentBufferSgpr;
93    cxbyte kernargSegmentAlignment;     ///< kernel segment alignment
94    cxbyte groupSegmentAlignment;       ///< group segment alignment
95    cxbyte privateSegmentAlignment;     ///< private segment alignment
96    cxbyte wavefrontSize;           ///< wavefront size
97    uint32_t callConvention;        ///< call convention
98    uint32_t reserved1[3];          ///< reserved
99    uint64_t runtimeLoaderKernelSymbol;
100    cxbyte controlDirective[128];       ///< control directives area
101   
102    void toLE()
103    {
104        SLEV(amdCodeVersionMajor, amdCodeVersionMajor);
105        SLEV(amdCodeVersionMinor, amdCodeVersionMinor);
106        SLEV(amdMachineKind, amdMachineKind);
107        SLEV(amdMachineMajor, amdMachineMajor);
108        SLEV(amdMachineMinor, amdMachineMinor);
109        SLEV(amdMachineStepping, amdMachineStepping);
110        SLEV(kernelCodeEntryOffset, kernelCodeEntryOffset);
111        SLEV(kernelCodePrefetchOffset, kernelCodePrefetchOffset);
112        SLEV(kernelCodePrefetchSize, kernelCodePrefetchSize);
113        SLEV(maxScrachBackingMemorySize, maxScrachBackingMemorySize);
114        SLEV(computePgmRsrc1, computePgmRsrc1);
115        SLEV(computePgmRsrc2, computePgmRsrc2);
116        SLEV(enableSgprRegisterFlags, enableSgprRegisterFlags);
117        SLEV(enableFeatureFlags, enableFeatureFlags);
118        SLEV(workitemPrivateSegmentSize, workitemPrivateSegmentSize);
119        SLEV(workgroupGroupSegmentSize, workgroupGroupSegmentSize);
120        SLEV(gdsSegmentSize, gdsSegmentSize);
121        SLEV(kernargSegmentSize, kernargSegmentSize);
122        SLEV(workgroupFbarrierCount, workgroupFbarrierCount);
123        SLEV(wavefrontSgprCount, wavefrontSgprCount);
124        SLEV(workitemVgprCount, workitemVgprCount);
125        SLEV(reservedVgprFirst, reservedVgprFirst);
126        SLEV(reservedVgprCount, reservedVgprCount);
127        SLEV(reservedSgprFirst, reservedSgprFirst);
128        SLEV(reservedSgprCount, reservedSgprCount);
129        SLEV(debugWavefrontPrivateSegmentOffsetSgpr,
130             debugWavefrontPrivateSegmentOffsetSgpr);
131        SLEV(debugPrivateSegmentBufferSgpr, debugPrivateSegmentBufferSgpr);
132        SLEV(callConvention, callConvention);
133        SLEV(runtimeLoaderKernelSymbol, runtimeLoaderKernelSymbol);
134    }
135};
136
137};
138
139#endif
Note: See TracBrowser for help on using the repository browser.