source: CLRX/CLRadeonExtender/trunk/CLRX/amdbin/ROCmBinaries.h @ 2576

Last change on this file since 2576 was 2576, checked in by matszpk, 4 years ago

CLRadeonExtender: ROCmBinGen: Adding extra sections and symbols to binary.

File size: 6.3 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2016 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19/*! \file ROCmBinaries.h
20 * \brief ROCm binaries handling
21 */
22
23#ifndef __CLRX_ROCMBINARIES_H__
24#define __CLRX_ROCMBINARIES_H__
25
26#include <CLRX/Config.h>
27#include <cstddef>
28#include <cstdint>
29#include <memory>
30#include <string>
31#include <CLRX/amdbin/Elf.h>
32#include <CLRX/amdbin/ElfBinaries.h>
33#include <CLRX/utils/MemAccess.h>
34#include <CLRX/utils/Containers.h>
35#include <CLRX/utils/Utilities.h>
36#include <CLRX/utils/GPUId.h>
37#include <CLRX/utils/InputOutput.h>
38
39/// main namespace
40namespace CLRX
41{
42
43enum : Flags {
44    ROCMBIN_CREATE_REGIONMAP = 0x10,    ///< create region map
45   
46    ROCMBIN_CREATE_ALL = ELF_CREATE_ALL | 0xfff0 ///< all ROCm binaries flags
47};
48
49/// ROCm data region
50struct ROCmRegion
51{
52    CString regionName; ///< region name
53    size_t size;    ///< data size
54    size_t offset;     ///< data
55    bool isKernel;
56};
57
58/// ROCm main binary for GPU for 64-bit mode
59/** This object doesn't copy binary code content.
60 * Only it takes and uses a binary code.
61 */
62class ROCmBinary : public ElfBinary64, public NonCopyableAndNonMovable
63{
64public:
65    typedef Array<std::pair<CString, size_t> > RegionMap;
66private:
67    size_t regionsNum;
68    std::unique_ptr<ROCmRegion[]> regions;  ///< AMD metadatas
69    RegionMap regionsMap;
70    size_t codeSize;
71    cxbyte* code;
72public:
73    ROCmBinary(size_t binaryCodeSize, cxbyte* binaryCode,
74            Flags creationFlags = ROCMBIN_CREATE_ALL);
75    ~ROCmBinary() = default;
76   
77    /// get regions number
78    size_t getRegionsNum() const
79    { return regionsNum; }
80   
81    /// get region by index
82    const ROCmRegion& getRegion(size_t index) const
83    { return regions[index]; }
84   
85    /// get region by name
86    const ROCmRegion& getRegion(const char* name) const;
87   
88    /// get code size
89    size_t getCodeSize() const
90    { return codeSize; }
91    /// get code
92    const cxbyte* getCode() const
93    { return code; }
94   
95    /// returns true if kernel map exists
96    bool hasRegionMap() const
97    { return (creationFlags & ROCMBIN_CREATE_REGIONMAP) != 0; };
98};
99
100/// ROCm kernel configuration structure
101struct ROCmKernelConfig
102{
103    uint32_t amdCodeVersionMajor;
104    uint32_t amdCodeVersionMinor;
105    uint16_t amdMachineKind;
106    uint16_t amdMachineMajor;
107    uint16_t amdMachineMinor;
108    uint16_t amdMachineStepping;
109    uint64_t kernelCodeEntryOffset;
110    uint64_t kernelCodePrefetchOffset;
111    uint64_t kernelCodePrefetchSize;
112    uint64_t maxScrachBackingMemorySize;
113    uint32_t computePgmRsrc1;
114    uint32_t computePgmRsrc2;
115    uint16_t enableSpgrRegisterFlags;
116    uint16_t enableFeatureFlags;
117    uint32_t workitemPrivateSegmentSize;
118    uint32_t workgroupGroupSegmentSize;
119    uint32_t gdsSegmentSize;
120    uint64_t kernargSegmentSize;
121    uint32_t workgroupFbarrierCount;
122    uint16_t wavefrontSgprCount;
123    uint16_t workitemVgprCount;
124    uint16_t reservedVgprFirst;
125    uint16_t reservedVgprCount;
126    uint16_t reservedSgprFirst;
127    uint16_t reservedSgprCount;
128    uint16_t debugWavefrontPrivateSegmentOffsetSgpr;
129    uint16_t debugPrivateSegmentBufferSgpr;
130    cxbyte kernargSegmentAlignment;
131    cxbyte groupSegmentAlignment;
132    cxbyte privateSegmentAlignment;
133    cxbyte wavefrontSize;
134    uint32_t callConvention;
135    uint32_t reserved1[3];
136    uint64_t runtimeLoaderKernelSymbol;
137    cxbyte controlDirective[128];
138};
139
140/// check whether is Amd OpenCL 2.0 binary
141extern bool isROCmBinary(size_t binarySize, const cxbyte* binary);
142
143/*
144 * ROCm Binary Generator
145 */
146
147enum: cxuint {
148    ROCMSECTID_HASH = ELFSECTID_OTHER_BUILTIN,
149    ROCMSECTID_DYNAMIC,
150    ROCMSECTID_NOTE,
151    ROCMSECTID_GPUCONFIG,
152    ROCMSECTID_MAX = ROCMSECTID_GPUCONFIG
153};
154
155/// ROCm binary symbol input
156struct ROCmSymbolInput
157{
158    CString symbolName; ///< symbol name
159    size_t offset;  ///< offset in code
160    size_t size;    ///< size of symbol
161    bool isKernel;  ///< true if kernel
162};
163
164struct ROCmInput
165{
166    GPUDeviceType deviceType;   ///< GPU device type
167    uint32_t archMinor;         ///< GPU arch minor
168    uint32_t archStepping;      ///< GPU arch stepping
169    std::vector<ROCmSymbolInput> symbols;   ///< symbols
170    size_t codeSize;        ///< code size
171    const cxbyte* code;     ///< code
172    size_t commentSize; ///< comment size (can be null)
173    const char* comment; ///< comment
174    std::vector<BinSection> extraSections;  ///< extra sections
175    std::vector<BinSymbol> extraSymbols;    ///< extra symbols
176};
177
178class ROCmBinGenerator: public NonCopyableAndNonMovable
179{
180private:
181    private:
182    bool manageable;
183    const ROCmInput* input;
184   
185    void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
186             Array<cxbyte>* aPtr) const;
187public:
188    ROCmBinGenerator();
189    /// constructor with ROCm input
190    ROCmBinGenerator(const ROCmInput* rocmInput);
191   
192    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
193            size_t codeSize, const cxbyte* code,
194            const std::vector<ROCmSymbolInput>& symbols);
195    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
196            size_t codeSize, const cxbyte* code,
197            std::vector<ROCmSymbolInput>&& symbols);
198    ~ROCmBinGenerator();
199   
200    /// get input
201    const ROCmInput* getInput() const
202    { return input; }
203   
204    /// set input
205    void setInput(const ROCmInput* input);
206   
207    /// generates binary to array of bytes
208    void generate(Array<cxbyte>& array) const;
209   
210    /// generates binary to output stream
211    void generate(std::ostream& os) const;
212   
213    /// generates binary to vector of char
214    void generate(std::vector<char>& vector) const;
215};
216
217};
218
219#endif
Note: See TracBrowser for help on using the repository browser.