source: CLRX/CLRadeonExtender/trunk/CLRX/amdbin/ROCmBinaries.h @ 3662

Last change on this file since 3662 was 3662, checked in by matszpk, 3 years ago

CLRadeonExtender: ROCm: Add eflags support (allow to set or get various e_flags value in ELF header).

File size: 7.4 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2018 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19/*! \file ROCmBinaries.h
20 * \brief ROCm binaries handling
21 */
22
23#ifndef __CLRX_ROCMBINARIES_H__
24#define __CLRX_ROCMBINARIES_H__
25
26#include <CLRX/Config.h>
27#include <cstddef>
28#include <cstdint>
29#include <memory>
30#include <string>
31#include <CLRX/amdbin/Elf.h>
32#include <CLRX/amdbin/ElfBinaries.h>
33#include <CLRX/amdbin/Commons.h>
34#include <CLRX/utils/MemAccess.h>
35#include <CLRX/utils/Containers.h>
36#include <CLRX/utils/Utilities.h>
37#include <CLRX/utils/GPUId.h>
38#include <CLRX/utils/InputOutput.h>
39
40/// main namespace
41namespace CLRX
42{
43
44enum : Flags {
45    ROCMBIN_CREATE_REGIONMAP = 0x10,    ///< create region map
46    ROCMBIN_CREATE_ALL = ELF_CREATE_ALL | 0xfff0 ///< all ROCm binaries flags
47};
48
49/// ROCm region/symbol type
50enum ROCmRegionType: uint8_t
51{
52    DATA,   ///< data object
53    FKERNEL,   ///< function kernel (code)
54    KERNEL  ///< OpenCL kernel to call ??
55};
56
57/// ROCm data region
58struct ROCmRegion
59{
60    CString regionName; ///< region name
61    size_t size;    ///< data size
62    size_t offset;     ///< data
63    ROCmRegionType type; ///< type
64};
65
66/// ROCm main binary for GPU for 64-bit mode
67/** This object doesn't copy binary code content.
68 * Only it takes and uses a binary code.
69 */
70class ROCmBinary : public ElfBinary64, public NonCopyableAndNonMovable
71{
72public:
73    /// region map type
74    typedef Array<std::pair<CString, size_t> > RegionMap;
75private:
76    size_t regionsNum;
77    std::unique_ptr<ROCmRegion[]> regions;  ///< AMD metadatas
78    RegionMap regionsMap;
79    size_t codeSize;
80    cxbyte* code;
81public:
82    /// constructor
83    ROCmBinary(size_t binaryCodeSize, cxbyte* binaryCode,
84            Flags creationFlags = ROCMBIN_CREATE_ALL);
85    /// default destructor
86    ~ROCmBinary() = default;
87   
88    /// determine GPU device type from this binary
89    GPUDeviceType determineGPUDeviceType(uint32_t& archMinor,
90                     uint32_t& archStepping) const;
91   
92    /// get regions number
93    size_t getRegionsNum() const
94    { return regionsNum; }
95   
96    /// get region by index
97    const ROCmRegion& getRegion(size_t index) const
98    { return regions[index]; }
99   
100    /// get region by name
101    const ROCmRegion& getRegion(const char* name) const;
102   
103    /// get code size
104    size_t getCodeSize() const
105    { return codeSize; }
106    /// get code
107    const cxbyte* getCode() const
108    { return code; }
109   
110    /// returns true if kernel map exists
111    bool hasRegionMap() const
112    { return (creationFlags & ROCMBIN_CREATE_REGIONMAP) != 0; };
113};
114
115enum {
116    ROCMFLAG_USE_PRIVATE_SEGMENT_BUFFER = AMDHSAFLAG_USE_PRIVATE_SEGMENT_BUFFER,
117    ROCMFLAG_USE_DISPATCH_PTR = AMDHSAFLAG_USE_DISPATCH_PTR,
118    ROCMFLAG_USE_QUEUE_PTR = AMDHSAFLAG_USE_QUEUE_PTR,
119    ROCMFLAG_USE_KERNARG_SEGMENT_PTR = AMDHSAFLAG_USE_KERNARG_SEGMENT_PTR,
120    ROCMFLAG_USE_DISPATCH_ID = AMDHSAFLAG_USE_DISPATCH_ID,
121    ROCMFLAG_USE_FLAT_SCRATCH_INIT = AMDHSAFLAG_USE_FLAT_SCRATCH_INIT,
122    ROCMFLAG_USE_PRIVATE_SEGMENT_SIZE = AMDHSAFLAG_USE_PRIVATE_SEGMENT_SIZE,
123    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_BIT = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_BIT,
124    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_X = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_X,
125    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Y = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Y,
126    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Z = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Z,
127   
128    ROCMFLAG_USE_ORDERED_APPEND_GDS = AMDHSAFLAG_USE_ORDERED_APPEND_GDS,
129    ROCMFLAG_PRIVATE_ELEM_SIZE_BIT = AMDHSAFLAG_PRIVATE_ELEM_SIZE_BIT,
130    ROCMFLAG_USE_PTR64 = AMDHSAFLAG_USE_PTR64,
131    ROCMFLAG_USE_DYNAMIC_CALL_STACK = AMDHSAFLAG_USE_DYNAMIC_CALL_STACK,
132    ROCMFLAG_USE_DEBUG_ENABLED = AMDHSAFLAG_USE_DEBUG_ENABLED,
133    ROCMFLAG_USE_XNACK_ENABLED = AMDHSAFLAG_USE_XNACK_ENABLED
134};
135
136/// ROCm kernel configuration structure
137typedef AmdHsaKernelConfig ROCmKernelConfig;
138
139/// check whether is Amd OpenCL 2.0 binary
140extern bool isROCmBinary(size_t binarySize, const cxbyte* binary);
141
142/*
143 * ROCm Binary Generator
144 */
145
146enum: cxuint {
147    ROCMSECTID_HASH = ELFSECTID_OTHER_BUILTIN,
148    ROCMSECTID_DYNAMIC,
149    ROCMSECTID_NOTE,
150    ROCMSECTID_GPUCONFIG,
151    ROCMSECTID_MAX = ROCMSECTID_GPUCONFIG
152};
153
154/// ROCm binary symbol input
155struct ROCmSymbolInput
156{
157    CString symbolName; ///< symbol name
158    size_t offset;  ///< offset in code
159    size_t size;    ///< size of symbol
160    ROCmRegionType type;  ///< type
161};
162
163/// ROCm binary input structure
164struct ROCmInput
165{
166    GPUDeviceType deviceType;   ///< GPU device type
167    uint32_t archMinor;         ///< GPU arch minor
168    uint32_t archStepping;      ///< GPU arch stepping
169    std::vector<ROCmSymbolInput> symbols;   ///< symbols
170    size_t codeSize;        ///< code size
171    const cxbyte* code;     ///< code
172    size_t commentSize; ///< comment size (can be null)
173    const char* comment; ///< comment
174    std::vector<BinSection> extraSections;  ///< extra sections
175    std::vector<BinSymbol> extraSymbols;    ///< extra symbols
176    uint32_t eflags;    ///< ELF headef e_flags field
177   
178    /// add empty kernel with default values
179    void addEmptyKernel(const char* kernelName);
180};
181
182/// ROCm binary generator
183class ROCmBinGenerator: public NonCopyableAndNonMovable
184{
185private:
186    private:
187    bool manageable;
188    const ROCmInput* input;
189   
190    void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
191             Array<cxbyte>* aPtr) const;
192public:
193    /// constructor
194    ROCmBinGenerator();
195    /// constructor with ROCm input
196    explicit ROCmBinGenerator(const ROCmInput* rocmInput);
197   
198    /// constructor
199    /**
200     * \param deviceType device type
201     * \param archMinor architecture minor number
202     * \param archStepping architecture stepping number
203     * \param codeSize size of code
204     * \param code code pointer
205     * \param symbols symbols (kernels, datas,...)
206     */
207    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
208            size_t codeSize, const cxbyte* code,
209            const std::vector<ROCmSymbolInput>& symbols);
210    /// constructor
211    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
212            size_t codeSize, const cxbyte* code,
213            std::vector<ROCmSymbolInput>&& symbols);
214    /// destructor
215    ~ROCmBinGenerator();
216   
217    /// get input
218    const ROCmInput* getInput() const
219    { return input; }
220   
221    /// set input
222    void setInput(const ROCmInput* input);
223   
224    /// generates binary to array of bytes
225    void generate(Array<cxbyte>& array) const;
226   
227    /// generates binary to output stream
228    void generate(std::ostream& os) const;
229   
230    /// generates binary to vector of char
231    void generate(std::vector<char>& vector) const;
232};
233
234};
235
236#endif
Note: See TracBrowser for help on using the repository browser.