source: CLRX/CLRadeonExtender/trunk/CLRX/amdbin/ROCmBinaries.h @ 2599

Last change on this file since 2599 was 2599, checked in by matszpk, 4 years ago

CLRadeonExtender: ROCm: Recognize other kernel object (function) in binaries and disassemble them.

File size: 6.5 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2016 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19/*! \file ROCmBinaries.h
20 * \brief ROCm binaries handling
21 */
22
23#ifndef __CLRX_ROCMBINARIES_H__
24#define __CLRX_ROCMBINARIES_H__
25
26#include <CLRX/Config.h>
27#include <cstddef>
28#include <cstdint>
29#include <memory>
30#include <string>
31#include <CLRX/amdbin/Elf.h>
32#include <CLRX/amdbin/ElfBinaries.h>
33#include <CLRX/utils/MemAccess.h>
34#include <CLRX/utils/Containers.h>
35#include <CLRX/utils/Utilities.h>
36#include <CLRX/utils/GPUId.h>
37#include <CLRX/utils/InputOutput.h>
38
39/// main namespace
40namespace CLRX
41{
42
43enum : Flags {
44    ROCMBIN_CREATE_REGIONMAP = 0x10,    ///< create region map
45   
46    ROCMBIN_CREATE_ALL = ELF_CREATE_ALL | 0xfff0 ///< all ROCm binaries flags
47};
48
49enum ROCmRegionType: uint8_t
50{
51    DATA,
52    CODE,
53    KERNEL
54};
55
56/// ROCm data region
57struct ROCmRegion
58{
59    CString regionName; ///< region name
60    size_t size;    ///< data size
61    size_t offset;     ///< data
62    ROCmRegionType type;
63};
64
65/// ROCm main binary for GPU for 64-bit mode
66/** This object doesn't copy binary code content.
67 * Only it takes and uses a binary code.
68 */
69class ROCmBinary : public ElfBinary64, public NonCopyableAndNonMovable
70{
71public:
72    typedef Array<std::pair<CString, size_t> > RegionMap;
73private:
74    size_t regionsNum;
75    std::unique_ptr<ROCmRegion[]> regions;  ///< AMD metadatas
76    RegionMap regionsMap;
77    size_t codeSize;
78    cxbyte* code;
79public:
80    ROCmBinary(size_t binaryCodeSize, cxbyte* binaryCode,
81            Flags creationFlags = ROCMBIN_CREATE_ALL);
82    ~ROCmBinary() = default;
83   
84    /// get regions number
85    size_t getRegionsNum() const
86    { return regionsNum; }
87   
88    /// get region by index
89    const ROCmRegion& getRegion(size_t index) const
90    { return regions[index]; }
91   
92    /// get region by name
93    const ROCmRegion& getRegion(const char* name) const;
94   
95    /// get code size
96    size_t getCodeSize() const
97    { return codeSize; }
98    /// get code
99    const cxbyte* getCode() const
100    { return code; }
101   
102    /// returns true if kernel map exists
103    bool hasRegionMap() const
104    { return (creationFlags & ROCMBIN_CREATE_REGIONMAP) != 0; };
105};
106
107/// ROCm kernel configuration structure
108struct ROCmKernelConfig
109{
110    uint32_t amdCodeVersionMajor;
111    uint32_t amdCodeVersionMinor;
112    uint16_t amdMachineKind;
113    uint16_t amdMachineMajor;
114    uint16_t amdMachineMinor;
115    uint16_t amdMachineStepping;
116    uint64_t kernelCodeEntryOffset;
117    uint64_t kernelCodePrefetchOffset;
118    uint64_t kernelCodePrefetchSize;
119    uint64_t maxScrachBackingMemorySize;
120    uint32_t computePgmRsrc1;
121    uint32_t computePgmRsrc2;
122    uint16_t enableSpgrRegisterFlags;
123    uint16_t enableFeatureFlags;
124    uint32_t workitemPrivateSegmentSize;
125    uint32_t workgroupGroupSegmentSize;
126    uint32_t gdsSegmentSize;
127    uint64_t kernargSegmentSize;
128    uint32_t workgroupFbarrierCount;
129    uint16_t wavefrontSgprCount;
130    uint16_t workitemVgprCount;
131    uint16_t reservedVgprFirst;
132    uint16_t reservedVgprCount;
133    uint16_t reservedSgprFirst;
134    uint16_t reservedSgprCount;
135    uint16_t debugWavefrontPrivateSegmentOffsetSgpr;
136    uint16_t debugPrivateSegmentBufferSgpr;
137    cxbyte kernargSegmentAlignment;
138    cxbyte groupSegmentAlignment;
139    cxbyte privateSegmentAlignment;
140    cxbyte wavefrontSize;
141    uint32_t callConvention;
142    uint32_t reserved1[3];
143    uint64_t runtimeLoaderKernelSymbol;
144    cxbyte controlDirective[128];
145};
146
147/// check whether is Amd OpenCL 2.0 binary
148extern bool isROCmBinary(size_t binarySize, const cxbyte* binary);
149
150/*
151 * ROCm Binary Generator
152 */
153
154enum: cxuint {
155    ROCMSECTID_HASH = ELFSECTID_OTHER_BUILTIN,
156    ROCMSECTID_DYNAMIC,
157    ROCMSECTID_NOTE,
158    ROCMSECTID_GPUCONFIG,
159    ROCMSECTID_MAX = ROCMSECTID_GPUCONFIG
160};
161
162/// ROCm binary symbol input
163struct ROCmSymbolInput
164{
165    CString symbolName; ///< symbol name
166    size_t offset;  ///< offset in code
167    size_t size;    ///< size of symbol
168    ROCmRegionType type;  ///< type
169};
170
171struct ROCmInput
172{
173    GPUDeviceType deviceType;   ///< GPU device type
174    uint32_t archMinor;         ///< GPU arch minor
175    uint32_t archStepping;      ///< GPU arch stepping
176    std::vector<ROCmSymbolInput> symbols;   ///< symbols
177    size_t codeSize;        ///< code size
178    const cxbyte* code;     ///< code
179    size_t commentSize; ///< comment size (can be null)
180    const char* comment; ///< comment
181    std::vector<BinSection> extraSections;  ///< extra sections
182    std::vector<BinSymbol> extraSymbols;    ///< extra symbols
183   
184    void addEmptyKernel(const char* kernelName);
185};
186
187class ROCmBinGenerator: public NonCopyableAndNonMovable
188{
189private:
190    private:
191    bool manageable;
192    const ROCmInput* input;
193   
194    void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
195             Array<cxbyte>* aPtr) const;
196public:
197    ROCmBinGenerator();
198    /// constructor with ROCm input
199    ROCmBinGenerator(const ROCmInput* rocmInput);
200   
201    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
202            size_t codeSize, const cxbyte* code,
203            const std::vector<ROCmSymbolInput>& symbols);
204    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
205            size_t codeSize, const cxbyte* code,
206            std::vector<ROCmSymbolInput>&& symbols);
207    ~ROCmBinGenerator();
208   
209    /// get input
210    const ROCmInput* getInput() const
211    { return input; }
212   
213    /// set input
214    void setInput(const ROCmInput* input);
215   
216    /// generates binary to array of bytes
217    void generate(Array<cxbyte>& array) const;
218   
219    /// generates binary to output stream
220    void generate(std::ostream& os) const;
221   
222    /// generates binary to vector of char
223    void generate(std::vector<char>& vector) const;
224};
225
226};
227
228#endif
Note: See TracBrowser for help on using the repository browser.