source: CLRX/CLRadeonExtender/trunk/CLRX/amdbin/ROCmBinaries.h @ 3665

Last change on this file since 3665 was 3665, checked in by matszpk, 3 years ago

CLRadeonExtender: ROCm: Add tentatively target and metadata to the ROCm format.

File size: 7.9 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2018 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19/*! \file ROCmBinaries.h
20 * \brief ROCm binaries handling
21 */
22
23#ifndef __CLRX_ROCMBINARIES_H__
24#define __CLRX_ROCMBINARIES_H__
25
26#include <CLRX/Config.h>
27#include <cstddef>
28#include <cstdint>
29#include <memory>
30#include <string>
31#include <CLRX/amdbin/Elf.h>
32#include <CLRX/amdbin/ElfBinaries.h>
33#include <CLRX/amdbin/Commons.h>
34#include <CLRX/utils/MemAccess.h>
35#include <CLRX/utils/Containers.h>
36#include <CLRX/utils/Utilities.h>
37#include <CLRX/utils/GPUId.h>
38#include <CLRX/utils/InputOutput.h>
39
40/// main namespace
41namespace CLRX
42{
43
44enum : Flags {
45    ROCMBIN_CREATE_REGIONMAP = 0x10,    ///< create region map
46    ROCMBIN_CREATE_ALL = ELF_CREATE_ALL | 0xfff0 ///< all ROCm binaries flags
47};
48
49/// ROCm region/symbol type
50enum ROCmRegionType: uint8_t
51{
52    DATA,   ///< data object
53    FKERNEL,   ///< function kernel (code)
54    KERNEL  ///< OpenCL kernel to call ??
55};
56
57/// ROCm data region
58struct ROCmRegion
59{
60    CString regionName; ///< region name
61    size_t size;    ///< data size
62    size_t offset;     ///< data
63    ROCmRegionType type; ///< type
64};
65
66/// ROCm main binary for GPU for 64-bit mode
67/** This object doesn't copy binary code content.
68 * Only it takes and uses a binary code.
69 */
70class ROCmBinary : public ElfBinary64, public NonCopyableAndNonMovable
71{
72public:
73    /// region map type
74    typedef Array<std::pair<CString, size_t> > RegionMap;
75private:
76    size_t regionsNum;
77    std::unique_ptr<ROCmRegion[]> regions;  ///< AMD metadatas
78    RegionMap regionsMap;
79    size_t codeSize;
80    cxbyte* code;
81    CString target;
82    size_t metadataSize;
83    char* metadata;
84public:
85    /// constructor
86    ROCmBinary(size_t binaryCodeSize, cxbyte* binaryCode,
87            Flags creationFlags = ROCMBIN_CREATE_ALL);
88    /// default destructor
89    ~ROCmBinary() = default;
90   
91    /// determine GPU device type from this binary
92    GPUDeviceType determineGPUDeviceType(uint32_t& archMinor,
93                     uint32_t& archStepping) const;
94   
95    /// get regions number
96    size_t getRegionsNum() const
97    { return regionsNum; }
98   
99    /// get region by index
100    const ROCmRegion& getRegion(size_t index) const
101    { return regions[index]; }
102   
103    /// get region by name
104    const ROCmRegion& getRegion(const char* name) const;
105   
106    /// get code size
107    size_t getCodeSize() const
108    { return codeSize; }
109    /// get code
110    const cxbyte* getCode() const
111    { return code; }
112   
113    /// get metadata size
114    size_t getMetadataSize() const
115    { return metadataSize; }
116    /// get metadata
117    const char* getMetadata() const
118    { return metadata; }
119   
120    const CString& getTarget() const
121    { return target; }
122   
123    /// returns true if kernel map exists
124    bool hasRegionMap() const
125    { return (creationFlags & ROCMBIN_CREATE_REGIONMAP) != 0; };
126};
127
128enum {
129    ROCMFLAG_USE_PRIVATE_SEGMENT_BUFFER = AMDHSAFLAG_USE_PRIVATE_SEGMENT_BUFFER,
130    ROCMFLAG_USE_DISPATCH_PTR = AMDHSAFLAG_USE_DISPATCH_PTR,
131    ROCMFLAG_USE_QUEUE_PTR = AMDHSAFLAG_USE_QUEUE_PTR,
132    ROCMFLAG_USE_KERNARG_SEGMENT_PTR = AMDHSAFLAG_USE_KERNARG_SEGMENT_PTR,
133    ROCMFLAG_USE_DISPATCH_ID = AMDHSAFLAG_USE_DISPATCH_ID,
134    ROCMFLAG_USE_FLAT_SCRATCH_INIT = AMDHSAFLAG_USE_FLAT_SCRATCH_INIT,
135    ROCMFLAG_USE_PRIVATE_SEGMENT_SIZE = AMDHSAFLAG_USE_PRIVATE_SEGMENT_SIZE,
136    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_BIT = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_BIT,
137    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_X = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_X,
138    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Y = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Y,
139    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Z = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Z,
140   
141    ROCMFLAG_USE_ORDERED_APPEND_GDS = AMDHSAFLAG_USE_ORDERED_APPEND_GDS,
142    ROCMFLAG_PRIVATE_ELEM_SIZE_BIT = AMDHSAFLAG_PRIVATE_ELEM_SIZE_BIT,
143    ROCMFLAG_USE_PTR64 = AMDHSAFLAG_USE_PTR64,
144    ROCMFLAG_USE_DYNAMIC_CALL_STACK = AMDHSAFLAG_USE_DYNAMIC_CALL_STACK,
145    ROCMFLAG_USE_DEBUG_ENABLED = AMDHSAFLAG_USE_DEBUG_ENABLED,
146    ROCMFLAG_USE_XNACK_ENABLED = AMDHSAFLAG_USE_XNACK_ENABLED
147};
148
149/// ROCm kernel configuration structure
150typedef AmdHsaKernelConfig ROCmKernelConfig;
151
152/// check whether is Amd OpenCL 2.0 binary
153extern bool isROCmBinary(size_t binarySize, const cxbyte* binary);
154
155/*
156 * ROCm Binary Generator
157 */
158
159enum: cxuint {
160    ROCMSECTID_HASH = ELFSECTID_OTHER_BUILTIN,
161    ROCMSECTID_DYNAMIC,
162    ROCMSECTID_NOTE,
163    ROCMSECTID_GPUCONFIG,
164    ROCMSECTID_MAX = ROCMSECTID_GPUCONFIG
165};
166
167/// ROCm binary symbol input
168struct ROCmSymbolInput
169{
170    CString symbolName; ///< symbol name
171    size_t offset;  ///< offset in code
172    size_t size;    ///< size of symbol
173    ROCmRegionType type;  ///< type
174};
175
176/// ROCm binary input structure
177struct ROCmInput
178{
179    GPUDeviceType deviceType;   ///< GPU device type
180    uint32_t archMinor;         ///< GPU arch minor
181    uint32_t archStepping;      ///< GPU arch stepping
182    uint32_t eflags;    ///< ELF headef e_flags field
183    bool newBinFormat;       ///< use new binary format for ROCm
184    std::vector<ROCmSymbolInput> symbols;   ///< symbols
185    size_t codeSize;        ///< code size
186    const cxbyte* code;     ///< code
187    size_t commentSize; ///< comment size (can be null)
188    const char* comment; ///< comment
189    CString target;     ///< LLVM target triple
190    size_t metadataSize;    ///< metadata size
191    const char* metadata;   ///< metadata
192    std::vector<BinSection> extraSections;  ///< extra sections
193    std::vector<BinSymbol> extraSymbols;    ///< extra symbols
194   
195    /// add empty kernel with default values
196    void addEmptyKernel(const char* kernelName);
197};
198
199/// ROCm binary generator
200class ROCmBinGenerator: public NonCopyableAndNonMovable
201{
202private:
203    private:
204    bool manageable;
205    const ROCmInput* input;
206   
207    void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
208             Array<cxbyte>* aPtr) const;
209public:
210    /// constructor
211    ROCmBinGenerator();
212    /// constructor with ROCm input
213    explicit ROCmBinGenerator(const ROCmInput* rocmInput);
214   
215    /// constructor
216    /**
217     * \param deviceType device type
218     * \param archMinor architecture minor number
219     * \param archStepping architecture stepping number
220     * \param codeSize size of code
221     * \param code code pointer
222     * \param symbols symbols (kernels, datas,...)
223     */
224    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
225            size_t codeSize, const cxbyte* code,
226            const std::vector<ROCmSymbolInput>& symbols);
227    /// constructor
228    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
229            size_t codeSize, const cxbyte* code,
230            std::vector<ROCmSymbolInput>&& symbols);
231    /// destructor
232    ~ROCmBinGenerator();
233   
234    /// get input
235    const ROCmInput* getInput() const
236    { return input; }
237   
238    /// set input
239    void setInput(const ROCmInput* input);
240   
241    /// generates binary to array of bytes
242    void generate(Array<cxbyte>& array) const;
243   
244    /// generates binary to output stream
245    void generate(std::ostream& os) const;
246   
247    /// generates binary to vector of char
248    void generate(std::vector<char>& vector) const;
249};
250
251};
252
253#endif
Note: See TracBrowser for help on using the repository browser.