source: CLRX/CLRadeonExtender/trunk/CLRX/amdbin/ROCmBinaries.h @ 3698

Last change on this file since 3698 was 3698, checked in by matszpk, 3 years ago

CLRadeonExtender: ROCmMetadata: Replace ROCm metadata object constructor by initialize method. Add ROCmMetadata testsuite.

File size: 13.8 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2018 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19/*! \file ROCmBinaries.h
20 * \brief ROCm binaries handling
21 */
22
23#ifndef __CLRX_ROCMBINARIES_H__
24#define __CLRX_ROCMBINARIES_H__
25
26#include <CLRX/Config.h>
27#include <cstddef>
28#include <cstdint>
29#include <memory>
30#include <string>
31#include <vector>
32#include <CLRX/amdbin/Elf.h>
33#include <CLRX/amdbin/ElfBinaries.h>
34#include <CLRX/amdbin/Commons.h>
35#include <CLRX/utils/MemAccess.h>
36#include <CLRX/utils/Containers.h>
37#include <CLRX/utils/Utilities.h>
38#include <CLRX/utils/GPUId.h>
39#include <CLRX/utils/InputOutput.h>
40
41/// main namespace
42namespace CLRX
43{
44
45enum : Flags {
46    ROCMBIN_CREATE_REGIONMAP = 0x10,    ///< create region map
47    ROCMBIN_CREATE_METADATAINFO = 0x20,     ///< create metadata info object
48    ROCMBIN_CREATE_KERNELINFOMAP = 0x40,    ///< create kernel metadata info map
49    ROCMBIN_CREATE_ALL = ELF_CREATE_ALL | 0xfff0 ///< all ROCm binaries flags
50};
51
52/// ROCm region/symbol type
53enum ROCmRegionType: uint8_t
54{
55    DATA,   ///< data object
56    FKERNEL,   ///< function kernel (code)
57    KERNEL  ///< OpenCL kernel to call ??
58};
59
60/// ROCm data region
61struct ROCmRegion
62{
63    CString regionName; ///< region name
64    size_t size;    ///< data size
65    size_t offset;     ///< data
66    ROCmRegionType type; ///< type
67};
68
69/// ROCm Value kind
70enum class ROCmValueKind : cxbyte
71{
72    BY_VALUE = 0,       ///< value is just value
73    GLOBAL_BUFFER,      ///< passed in global buffer
74    DYN_SHARED_PTR,     ///< passed as dynamic shared pointer
75    SAMPLER,            ///< sampler
76    IMAGE,              ///< image
77    PIPE,               ///< OpenCL pipe
78    QUEUE,              ///< OpenCL queue
79    HIDDEN_GLOBAL_OFFSET_X, ///< OpenCL global offset X
80    HIDDEN_GLOBAL_OFFSET_Y, ///< OpenCL global offset Y
81    HIDDEN_GLOBAL_OFFSET_Z, ///< OpenCL global offset Z
82    HIDDEN_NONE,            ///< none (not used)
83    HIDDEN_PRINTF_BUFFER,   ///< buffer for printf calls
84    HIDDEN_DEFAULT_QUEUE,   ///< OpenCL default queue
85    HIDDEN_COMPLETION_ACTION    ///< ???
86};
87
88/// ROCm argument's value type
89enum class ROCmValueType : cxbyte
90{
91    STRUCTURE = 0,  ///< structure
92    INT8,       ///< 8-bit signed integer
93    UINT8,      ///< 8-bit unsigned integer
94    INT16,      ///< 16-bit signed integer
95    UINT16,     ///< 16-bit unsigned integer
96    FLOAT16,    ///< half floating point
97    INT32,      ///< 32-bit signed integer
98    UINT32,     ///< 32-bit unsigned integer
99    FLOAT32,    ///< single floating point
100    INT64,      ///< 64-bit signed integer
101    UINT64,     ///< 64-bit unsigned integer
102    FLOAT64     ///< double floating point
103};
104
105/// ROCm argument address space
106enum class ROCmAddressSpace : cxbyte
107{
108    NONE = 0,
109    PRIVATE,
110    GLOBAL,
111    CONSTANT,
112    LOCAL,
113    GENERIC,
114    REGION
115};
116
117/// ROCm access qualifier
118enum class ROCmAccessQual: cxbyte
119{
120    DEFAULT = 0,
121    READ_ONLY,
122    WRITE_ONLY,
123    READ_WRITE,
124};
125
126/// ROCm kernel argument
127struct ROCmKernelArgInfo
128{
129    CString name;       ///< name
130    CString typeName;   ///< type name
131    uint64_t size;      ///< argument size in bytes
132    uint64_t align;     ///< argument alignment in bytes
133    uint64_t pointeeAlign;      ///< alignemnt of pointed data of pointer
134    ROCmValueKind valueKind;    ///< value kind
135    ROCmValueType valueType;    ///< value type
136    ROCmAddressSpace addressSpace;  ///< pointer address space
137    ROCmAccessQual accessQual;      ///< access qualifier (for images and values)
138    ROCmAccessQual actualAccessQual;    ///< actual access qualifier
139    bool isConst;       ///< is constant
140    bool isRestrict;    ///< is restrict
141    bool isVolatile;    ///< is volatile
142    bool isPipe;        ///< is pipe
143};
144
145/// ROCm kernel metadata
146struct ROCmKernelMetadata
147{
148    CString name;       ///< kernel name
149    CString symbolName; ///< symbol name
150    std::vector<ROCmKernelArgInfo> argInfos;  ///< kernel arguments
151    CString language;       ///< language
152    cxuint langVersion[2];  ///< language version
153    cxuint reqdWorkGroupSize[3];    ///< required work group size
154    cxuint workGroupSizeHint[3];    ///< work group size hint
155    CString vecTypeHint;    ///< vector type hint
156    CString runtimeHandle;  ///< symbol of runtime handle
157    uint64_t kernargSegmentSize;    ///< kernel argument segment size
158    uint64_t groupSegmentFixedSize; ///< group segment size (fixed)
159    uint64_t privateSegmentFixedSize;   ///< private segment size (fixed)
160    uint64_t kernargSegmentAlign;       ///< alignment of kernel argument segment
161    cxuint wavefrontSize;       ///< wavefront size
162    cxuint sgprsNum;        ///< number of SGPRs
163    cxuint vgprsNum;        ///< number of VGPRs
164    uint64_t maxFlatWorkGroupSize;
165    cxuint fixedWorkGroupSize[3];
166    cxuint spilledSgprs;    ///< number of spilled SGPRs
167    cxuint spilledVgprs;    ///< number of spilled VGPRs
168   
169    void initialize();
170};
171
172/// ROCm printf call info
173struct ROCmPrintfInfo
174{
175    uint32_t id;    /// unique id of call
176    Array<uint32_t> argSizes;   ///< argument sizes
177    CString format;     ///< printf format
178};
179
180/// ROCm binary metadata
181struct ROCmMetadata
182{
183    cxuint version[2];  ///< version
184    std::vector<ROCmPrintfInfo> printfInfos;  ///< printf calls infos
185    std::vector<ROCmKernelMetadata> kernels;  ///< kernel metadatas
186   
187    void initialize();
188};
189
190/// ROCm main binary for GPU for 64-bit mode
191/** This object doesn't copy binary code content.
192 * Only it takes and uses a binary code.
193 */
194class ROCmBinary : public ElfBinary64, public NonCopyableAndNonMovable
195{
196public:
197    /// region map type
198    typedef Array<std::pair<CString, size_t> > RegionMap;
199private:
200    size_t regionsNum;
201    std::unique_ptr<ROCmRegion[]> regions;  ///< AMD metadatas
202    RegionMap regionsMap;
203    size_t codeSize;
204    cxbyte* code;
205    size_t globalDataSize;
206    cxbyte* globalData;
207    CString target;
208    size_t metadataSize;
209    char* metadata;
210    std::unique_ptr<ROCmMetadata> metadataInfo;
211    RegionMap kernelInfosMap;
212    bool newBinFormat;
213public:
214    /// constructor
215    ROCmBinary(size_t binaryCodeSize, cxbyte* binaryCode,
216            Flags creationFlags = ROCMBIN_CREATE_ALL);
217    /// default destructor
218    ~ROCmBinary() = default;
219   
220    /// determine GPU device type from this binary
221    GPUDeviceType determineGPUDeviceType(uint32_t& archMinor,
222                     uint32_t& archStepping) const;
223   
224    /// get regions number
225    size_t getRegionsNum() const
226    { return regionsNum; }
227   
228    /// get region by index
229    const ROCmRegion& getRegion(size_t index) const
230    { return regions[index]; }
231   
232    /// get region by name
233    const ROCmRegion& getRegion(const char* name) const;
234   
235    /// get code size
236    size_t getCodeSize() const
237    { return codeSize; }
238    /// get code
239    const cxbyte* getCode() const
240    { return code; }
241    /// get code
242    cxbyte* getCode()
243    { return code; }
244   
245    /// get global data size
246    size_t getGlobalDataSize() const
247    { return globalDataSize; }
248   
249    /// get global data
250    const cxbyte* getGlobalData() const
251    { return globalData; }
252    /// get global data
253    cxbyte* getGlobalData()
254    { return globalData; }
255   
256    /// get metadata size
257    size_t getMetadataSize() const
258    { return metadataSize; }
259    /// get metadata
260    const char* getMetadata() const
261    { return metadata; }
262    /// get metadata
263    char* getMetadata()
264    { return metadata; }
265   
266    /// has metadata info
267    bool hasMetadataInfo() const
268    { return metadataInfo!=nullptr; }
269   
270    /// get metadata info
271    const ROCmMetadata& getMetadataInfo() const
272    { return *metadataInfo; }
273   
274    /// get kernel metadata infos number
275    size_t getKernelInfosNum() const
276    { return metadataInfo->kernels.size(); }
277   
278    /// get kernel metadata info
279    const ROCmKernelMetadata& getKernelInfo(size_t index) const
280    { return metadataInfo->kernels[index]; }
281   
282    /// get kernel metadata info by name
283    const ROCmKernelMetadata& getKernelInfo(const char* name) const;
284   
285    /// get target
286    const CString& getTarget() const
287    { return target; }
288   
289    /// return true is new binary format
290    bool isNewBinaryFormat() const
291    { return newBinFormat; }
292   
293    /// returns true if kernel map exists
294    bool hasRegionMap() const
295    { return (creationFlags & ROCMBIN_CREATE_REGIONMAP) != 0; }
296    /// returns true if object has kernel map
297    bool hasKernelInfoMap() const
298    { return (creationFlags & ROCMBIN_CREATE_KERNELINFOMAP) != 0; }
299};
300
301enum {
302    ROCMFLAG_USE_PRIVATE_SEGMENT_BUFFER = AMDHSAFLAG_USE_PRIVATE_SEGMENT_BUFFER,
303    ROCMFLAG_USE_DISPATCH_PTR = AMDHSAFLAG_USE_DISPATCH_PTR,
304    ROCMFLAG_USE_QUEUE_PTR = AMDHSAFLAG_USE_QUEUE_PTR,
305    ROCMFLAG_USE_KERNARG_SEGMENT_PTR = AMDHSAFLAG_USE_KERNARG_SEGMENT_PTR,
306    ROCMFLAG_USE_DISPATCH_ID = AMDHSAFLAG_USE_DISPATCH_ID,
307    ROCMFLAG_USE_FLAT_SCRATCH_INIT = AMDHSAFLAG_USE_FLAT_SCRATCH_INIT,
308    ROCMFLAG_USE_PRIVATE_SEGMENT_SIZE = AMDHSAFLAG_USE_PRIVATE_SEGMENT_SIZE,
309    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_BIT = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_BIT,
310    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_X = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_X,
311    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Y = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Y,
312    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Z = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Z,
313   
314    ROCMFLAG_USE_ORDERED_APPEND_GDS = AMDHSAFLAG_USE_ORDERED_APPEND_GDS,
315    ROCMFLAG_PRIVATE_ELEM_SIZE_BIT = AMDHSAFLAG_PRIVATE_ELEM_SIZE_BIT,
316    ROCMFLAG_USE_PTR64 = AMDHSAFLAG_USE_PTR64,
317    ROCMFLAG_USE_DYNAMIC_CALL_STACK = AMDHSAFLAG_USE_DYNAMIC_CALL_STACK,
318    ROCMFLAG_USE_DEBUG_ENABLED = AMDHSAFLAG_USE_DEBUG_ENABLED,
319    ROCMFLAG_USE_XNACK_ENABLED = AMDHSAFLAG_USE_XNACK_ENABLED
320};
321
322/// ROCm kernel configuration structure
323typedef AmdHsaKernelConfig ROCmKernelConfig;
324
325/// check whether is Amd OpenCL 2.0 binary
326extern bool isROCmBinary(size_t binarySize, const cxbyte* binary);
327
328/*
329 * ROCm Binary Generator
330 */
331
332enum: cxuint {
333    ROCMSECTID_HASH = ELFSECTID_OTHER_BUILTIN,
334    ROCMSECTID_DYNAMIC,
335    ROCMSECTID_NOTE,
336    ROCMSECTID_GPUCONFIG,
337    ROCMSECTID_MAX = ROCMSECTID_GPUCONFIG
338};
339
340/// ROCm binary symbol input
341struct ROCmSymbolInput
342{
343    CString symbolName; ///< symbol name
344    size_t offset;  ///< offset in code
345    size_t size;    ///< size of symbol
346    ROCmRegionType type;  ///< type
347};
348
349/// ROCm binary input structure
350struct ROCmInput
351{
352    GPUDeviceType deviceType;   ///< GPU device type
353    uint32_t archMinor;         ///< GPU arch minor
354    uint32_t archStepping;      ///< GPU arch stepping
355    uint32_t eflags;    ///< ELF headef e_flags field
356    bool newBinFormat;       ///< use new binary format for ROCm
357    size_t globalDataSize;  ///< global data size
358    const cxbyte* globalData;   ///< global data
359    std::vector<ROCmSymbolInput> symbols;   ///< symbols
360    size_t codeSize;        ///< code size
361    const cxbyte* code;     ///< code
362    size_t commentSize; ///< comment size (can be null)
363    const char* comment; ///< comment
364    CString target;     ///< LLVM target triple with device name
365    CString targetTripple; ///< same LLVM target tripple
366    size_t metadataSize;    ///< metadata size
367    const char* metadata;   ///< metadata
368    std::vector<BinSection> extraSections;  ///< extra sections
369    std::vector<BinSymbol> extraSymbols;    ///< extra symbols
370   
371    /// add empty kernel with default values
372    void addEmptyKernel(const char* kernelName);
373};
374
375/// ROCm binary generator
376class ROCmBinGenerator: public NonCopyableAndNonMovable
377{
378private:
379    private:
380    bool manageable;
381    const ROCmInput* input;
382   
383    void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
384             Array<cxbyte>* aPtr) const;
385public:
386    /// constructor
387    ROCmBinGenerator();
388    /// constructor with ROCm input
389    explicit ROCmBinGenerator(const ROCmInput* rocmInput);
390   
391    /// constructor
392    /**
393     * \param deviceType device type
394     * \param archMinor architecture minor number
395     * \param archStepping architecture stepping number
396     * \param codeSize size of code
397     * \param code code pointer
398     * \param globalDataSize size of global data
399     * \param globalData global data pointer
400     * \param symbols symbols (kernels, datas,...)
401     */
402    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
403            size_t codeSize, const cxbyte* code,
404            size_t globalDataSize, const cxbyte* globalData,
405            const std::vector<ROCmSymbolInput>& symbols);
406    /// constructor
407    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
408            size_t codeSize, const cxbyte* code,
409            size_t globalDataSize, const cxbyte* globalData,
410            std::vector<ROCmSymbolInput>&& symbols);
411    /// destructor
412    ~ROCmBinGenerator();
413   
414    /// get input
415    const ROCmInput* getInput() const
416    { return input; }
417   
418    /// set input
419    void setInput(const ROCmInput* input);
420   
421    /// generates binary to array of bytes
422    void generate(Array<cxbyte>& array) const;
423   
424    /// generates binary to output stream
425    void generate(std::ostream& os) const;
426   
427    /// generates binary to vector of char
428    void generate(std::vector<char>& vector) const;
429};
430
431};
432
433#endif
Note: See TracBrowser for help on using the repository browser.