source: CLRX/CLRadeonExtender/trunk/CLRX/amdbin/ROCmBinaries.h @ 3575

Last change on this file since 3575 was 3575, checked in by matszpk, 3 years ago

CLRadeonExtender: Change Copyright dates.

File size: 7.3 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2018 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19/*! \file ROCmBinaries.h
20 * \brief ROCm binaries handling
21 */
22
23#ifndef __CLRX_ROCMBINARIES_H__
24#define __CLRX_ROCMBINARIES_H__
25
26#include <CLRX/Config.h>
27#include <cstddef>
28#include <cstdint>
29#include <memory>
30#include <string>
31#include <CLRX/amdbin/Elf.h>
32#include <CLRX/amdbin/ElfBinaries.h>
33#include <CLRX/amdbin/Commons.h>
34#include <CLRX/utils/MemAccess.h>
35#include <CLRX/utils/Containers.h>
36#include <CLRX/utils/Utilities.h>
37#include <CLRX/utils/GPUId.h>
38#include <CLRX/utils/InputOutput.h>
39
40/// main namespace
41namespace CLRX
42{
43
44enum : Flags {
45    ROCMBIN_CREATE_REGIONMAP = 0x10,    ///< create region map
46    ROCMBIN_CREATE_ALL = ELF_CREATE_ALL | 0xfff0 ///< all ROCm binaries flags
47};
48
49/// ROCm region/symbol type
50enum ROCmRegionType: uint8_t
51{
52    DATA,   ///< data object
53    FKERNEL,   ///< function kernel (code)
54    KERNEL  ///< OpenCL kernel to call ??
55};
56
57/// ROCm data region
58struct ROCmRegion
59{
60    CString regionName; ///< region name
61    size_t size;    ///< data size
62    size_t offset;     ///< data
63    ROCmRegionType type; ///< type
64};
65
66/// ROCm main binary for GPU for 64-bit mode
67/** This object doesn't copy binary code content.
68 * Only it takes and uses a binary code.
69 */
70class ROCmBinary : public ElfBinary64, public NonCopyableAndNonMovable
71{
72public:
73    /// region map type
74    typedef Array<std::pair<CString, size_t> > RegionMap;
75private:
76    size_t regionsNum;
77    std::unique_ptr<ROCmRegion[]> regions;  ///< AMD metadatas
78    RegionMap regionsMap;
79    size_t codeSize;
80    cxbyte* code;
81public:
82    /// constructor
83    ROCmBinary(size_t binaryCodeSize, cxbyte* binaryCode,
84            Flags creationFlags = ROCMBIN_CREATE_ALL);
85    /// default destructor
86    ~ROCmBinary() = default;
87   
88    /// determine GPU device type from this binary
89    GPUDeviceType determineGPUDeviceType(uint32_t& archMinor,
90                     uint32_t& archStepping) const;
91   
92    /// get regions number
93    size_t getRegionsNum() const
94    { return regionsNum; }
95   
96    /// get region by index
97    const ROCmRegion& getRegion(size_t index) const
98    { return regions[index]; }
99   
100    /// get region by name
101    const ROCmRegion& getRegion(const char* name) const;
102   
103    /// get code size
104    size_t getCodeSize() const
105    { return codeSize; }
106    /// get code
107    const cxbyte* getCode() const
108    { return code; }
109   
110    /// returns true if kernel map exists
111    bool hasRegionMap() const
112    { return (creationFlags & ROCMBIN_CREATE_REGIONMAP) != 0; };
113};
114
115enum {
116    ROCMFLAG_USE_PRIVATE_SEGMENT_BUFFER = AMDHSAFLAG_USE_PRIVATE_SEGMENT_BUFFER,
117    ROCMFLAG_USE_DISPATCH_PTR = AMDHSAFLAG_USE_DISPATCH_PTR,
118    ROCMFLAG_USE_QUEUE_PTR = AMDHSAFLAG_USE_QUEUE_PTR,
119    ROCMFLAG_USE_KERNARG_SEGMENT_PTR = AMDHSAFLAG_USE_KERNARG_SEGMENT_PTR,
120    ROCMFLAG_USE_DISPATCH_ID = AMDHSAFLAG_USE_DISPATCH_ID,
121    ROCMFLAG_USE_FLAT_SCRATCH_INIT = AMDHSAFLAG_USE_FLAT_SCRATCH_INIT,
122    ROCMFLAG_USE_PRIVATE_SEGMENT_SIZE = AMDHSAFLAG_USE_PRIVATE_SEGMENT_SIZE,
123    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_BIT = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_BIT,
124    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_X = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_X,
125    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Y = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Y,
126    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Z = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Z,
127   
128    ROCMFLAG_USE_ORDERED_APPEND_GDS = AMDHSAFLAG_USE_ORDERED_APPEND_GDS,
129    ROCMFLAG_PRIVATE_ELEM_SIZE_BIT = AMDHSAFLAG_PRIVATE_ELEM_SIZE_BIT,
130    ROCMFLAG_USE_PTR64 = AMDHSAFLAG_USE_PTR64,
131    ROCMFLAG_USE_DYNAMIC_CALL_STACK = AMDHSAFLAG_USE_DYNAMIC_CALL_STACK,
132    ROCMFLAG_USE_DEBUG_ENABLED = AMDHSAFLAG_USE_DEBUG_ENABLED,
133    ROCMFLAG_USE_XNACK_ENABLED = AMDHSAFLAG_USE_XNACK_ENABLED
134};
135
136/// ROCm kernel configuration structure
137typedef AmdHsaKernelConfig ROCmKernelConfig;
138
139/// check whether is Amd OpenCL 2.0 binary
140extern bool isROCmBinary(size_t binarySize, const cxbyte* binary);
141
142/*
143 * ROCm Binary Generator
144 */
145
146enum: cxuint {
147    ROCMSECTID_HASH = ELFSECTID_OTHER_BUILTIN,
148    ROCMSECTID_DYNAMIC,
149    ROCMSECTID_NOTE,
150    ROCMSECTID_GPUCONFIG,
151    ROCMSECTID_MAX = ROCMSECTID_GPUCONFIG
152};
153
154/// ROCm binary symbol input
155struct ROCmSymbolInput
156{
157    CString symbolName; ///< symbol name
158    size_t offset;  ///< offset in code
159    size_t size;    ///< size of symbol
160    ROCmRegionType type;  ///< type
161};
162
163/// ROCm binary input structure
164struct ROCmInput
165{
166    GPUDeviceType deviceType;   ///< GPU device type
167    uint32_t archMinor;         ///< GPU arch minor
168    uint32_t archStepping;      ///< GPU arch stepping
169    std::vector<ROCmSymbolInput> symbols;   ///< symbols
170    size_t codeSize;        ///< code size
171    const cxbyte* code;     ///< code
172    size_t commentSize; ///< comment size (can be null)
173    const char* comment; ///< comment
174    std::vector<BinSection> extraSections;  ///< extra sections
175    std::vector<BinSymbol> extraSymbols;    ///< extra symbols
176   
177    /// add empty kernel with default values
178    void addEmptyKernel(const char* kernelName);
179};
180
181/// ROCm binary generator
182class ROCmBinGenerator: public NonCopyableAndNonMovable
183{
184private:
185    private:
186    bool manageable;
187    const ROCmInput* input;
188   
189    void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
190             Array<cxbyte>* aPtr) const;
191public:
192    /// constructor
193    ROCmBinGenerator();
194    /// constructor with ROCm input
195    explicit ROCmBinGenerator(const ROCmInput* rocmInput);
196   
197    /// constructor
198    /**
199     * \param deviceType device type
200     * \param archMinor architecture minor number
201     * \param archStepping architecture stepping number
202     * \param codeSize size of code
203     * \param code code pointer
204     * \param symbols symbols (kernels, datas,...)
205     */
206    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
207            size_t codeSize, const cxbyte* code,
208            const std::vector<ROCmSymbolInput>& symbols);
209    /// constructor
210    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
211            size_t codeSize, const cxbyte* code,
212            std::vector<ROCmSymbolInput>&& symbols);
213    /// destructor
214    ~ROCmBinGenerator();
215   
216    /// get input
217    const ROCmInput* getInput() const
218    { return input; }
219   
220    /// set input
221    void setInput(const ROCmInput* input);
222   
223    /// generates binary to array of bytes
224    void generate(Array<cxbyte>& array) const;
225   
226    /// generates binary to output stream
227    void generate(std::ostream& os) const;
228   
229    /// generates binary to vector of char
230    void generate(std::vector<char>& vector) const;
231};
232
233};
234
235#endif
Note: See TracBrowser for help on using the repository browser.