source: CLRX/CLRadeonExtender/trunk/CLRX/amdbin/ROCmBinaries.h @ 3726

Last change on this file since 3726 was 3726, checked in by matszpk, 2 years ago

CLRadeonExtender: DisasmAmd?&DisasmROCm: Check arg types and other fields before printing.
ROCmMetadata: Check arg types and other fields before generating. Escape number in string fields.
AsmROCm: Add additional pseudo-ops (only names) to set metadata info.

File size: 14.1 KB
Line 
1/*
2 *  CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3 *  Copyright (C) 2014-2018 Mateusz Szpakowski
4 *
5 *  This library is free software; you can redistribute it and/or
6 *  modify it under the terms of the GNU Lesser General Public
7 *  License as published by the Free Software Foundation; either
8 *  version 2.1 of the License, or (at your option) any later version.
9 *
10 *  This library is distributed in the hope that it will be useful,
11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 *  Lesser General Public License for more details.
14 *
15 *  You should have received a copy of the GNU Lesser General Public
16 *  License along with this library; if not, write to the Free Software
17 *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18 */
19/*! \file ROCmBinaries.h
20 * \brief ROCm binaries handling
21 */
22
23#ifndef __CLRX_ROCMBINARIES_H__
24#define __CLRX_ROCMBINARIES_H__
25
26#include <CLRX/Config.h>
27#include <cstddef>
28#include <cstdint>
29#include <memory>
30#include <string>
31#include <vector>
32#include <CLRX/amdbin/Elf.h>
33#include <CLRX/amdbin/ElfBinaries.h>
34#include <CLRX/amdbin/Commons.h>
35#include <CLRX/utils/MemAccess.h>
36#include <CLRX/utils/Containers.h>
37#include <CLRX/utils/Utilities.h>
38#include <CLRX/utils/GPUId.h>
39#include <CLRX/utils/InputOutput.h>
40
41/// main namespace
42namespace CLRX
43{
44
45enum : Flags {
46    ROCMBIN_CREATE_REGIONMAP = 0x10,    ///< create region map
47    ROCMBIN_CREATE_METADATAINFO = 0x20,     ///< create metadata info object
48    ROCMBIN_CREATE_KERNELINFOMAP = 0x40,    ///< create kernel metadata info map
49    ROCMBIN_CREATE_ALL = ELF_CREATE_ALL | 0xfff0 ///< all ROCm binaries flags
50};
51
52/// ROCm region/symbol type
53enum ROCmRegionType: uint8_t
54{
55    DATA,   ///< data object
56    FKERNEL,   ///< function kernel (code)
57    KERNEL  ///< OpenCL kernel to call ??
58};
59
60/// ROCm data region
61struct ROCmRegion
62{
63    CString regionName; ///< region name
64    size_t size;    ///< data size
65    size_t offset;     ///< data
66    ROCmRegionType type; ///< type
67};
68
69/// ROCm Value kind
70enum class ROCmValueKind : cxbyte
71{
72    BY_VALUE = 0,       ///< value is just value
73    GLOBAL_BUFFER,      ///< passed in global buffer
74    DYN_SHARED_PTR,     ///< passed as dynamic shared pointer
75    SAMPLER,            ///< sampler
76    IMAGE,              ///< image
77    PIPE,               ///< OpenCL pipe
78    QUEUE,              ///< OpenCL queue
79    HIDDEN_GLOBAL_OFFSET_X, ///< OpenCL global offset X
80    HIDDEN_GLOBAL_OFFSET_Y, ///< OpenCL global offset Y
81    HIDDEN_GLOBAL_OFFSET_Z, ///< OpenCL global offset Z
82    HIDDEN_NONE,            ///< none (not used)
83    HIDDEN_PRINTF_BUFFER,   ///< buffer for printf calls
84    HIDDEN_DEFAULT_QUEUE,   ///< OpenCL default queue
85    HIDDEN_COMPLETION_ACTION,    ///< ???
86    MAX_VALUE = HIDDEN_COMPLETION_ACTION
87};
88
89/// ROCm argument's value type
90enum class ROCmValueType : cxbyte
91{
92    STRUCTURE = 0,  ///< structure
93    INT8,       ///< 8-bit signed integer
94    UINT8,      ///< 8-bit unsigned integer
95    INT16,      ///< 16-bit signed integer
96    UINT16,     ///< 16-bit unsigned integer
97    FLOAT16,    ///< half floating point
98    INT32,      ///< 32-bit signed integer
99    UINT32,     ///< 32-bit unsigned integer
100    FLOAT32,    ///< single floating point
101    INT64,      ///< 64-bit signed integer
102    UINT64,     ///< 64-bit unsigned integer
103    FLOAT64,     ///< double floating point
104    MAX_VALUE = FLOAT64
105};
106
107/// ROCm argument address space
108enum class ROCmAddressSpace : cxbyte
109{
110    NONE = 0,
111    PRIVATE,
112    GLOBAL,
113    CONSTANT,
114    LOCAL,
115    GENERIC,
116    REGION,
117    MAX_VALUE = REGION
118};
119
120/// ROCm access qualifier
121enum class ROCmAccessQual: cxbyte
122{
123    DEFAULT = 0,
124    READ_ONLY,
125    WRITE_ONLY,
126    READ_WRITE,
127    MAX_VALUE = READ_WRITE
128};
129
130/// ROCm kernel argument
131struct ROCmKernelArgInfo
132{
133    CString name;       ///< name
134    CString typeName;   ///< type name
135    uint64_t size;      ///< argument size in bytes
136    uint64_t align;     ///< argument alignment in bytes
137    uint64_t pointeeAlign;      ///< alignemnt of pointed data of pointer
138    ROCmValueKind valueKind;    ///< value kind
139    ROCmValueType valueType;    ///< value type
140    ROCmAddressSpace addressSpace;  ///< pointer address space
141    ROCmAccessQual accessQual;      ///< access qualifier (for images and values)
142    ROCmAccessQual actualAccessQual;    ///< actual access qualifier
143    bool isConst;       ///< is constant
144    bool isRestrict;    ///< is restrict
145    bool isVolatile;    ///< is volatile
146    bool isPipe;        ///< is pipe
147};
148
149/// ROCm kernel metadata
150struct ROCmKernelMetadata
151{
152    CString name;       ///< kernel name
153    CString symbolName; ///< symbol name
154    std::vector<ROCmKernelArgInfo> argInfos;  ///< kernel arguments
155    CString language;       ///< language
156    cxuint langVersion[2];  ///< language version
157    cxuint reqdWorkGroupSize[3];    ///< required work group size
158    cxuint workGroupSizeHint[3];    ///< work group size hint
159    CString vecTypeHint;    ///< vector type hint
160    CString runtimeHandle;  ///< symbol of runtime handle
161    uint64_t kernargSegmentSize;    ///< kernel argument segment size
162    uint64_t groupSegmentFixedSize; ///< group segment size (fixed)
163    uint64_t privateSegmentFixedSize;   ///< private segment size (fixed)
164    uint64_t kernargSegmentAlign;       ///< alignment of kernel argument segment
165    cxuint wavefrontSize;       ///< wavefront size
166    cxuint sgprsNum;        ///< number of SGPRs
167    cxuint vgprsNum;        ///< number of VGPRs
168    uint64_t maxFlatWorkGroupSize;
169    cxuint fixedWorkGroupSize[3];
170    cxuint spilledSgprs;    ///< number of spilled SGPRs
171    cxuint spilledVgprs;    ///< number of spilled VGPRs
172   
173    void initialize();
174};
175
176/// ROCm printf call info
177struct ROCmPrintfInfo
178{
179    uint32_t id;    /// unique id of call
180    Array<uint32_t> argSizes;   ///< argument sizes
181    CString format;     ///< printf format
182};
183
184/// ROCm binary metadata
185struct ROCmMetadata
186{
187    cxuint version[2];  ///< version
188    std::vector<ROCmPrintfInfo> printfInfos;  ///< printf calls infos
189    std::vector<ROCmKernelMetadata> kernels;  ///< kernel metadatas
190   
191    /// initialize metadata info
192    void initialize();
193    /// parse metadata info from metadata string
194    void parse(size_t metadataSize, const char* metadata);
195};
196
197/// ROCm main binary for GPU for 64-bit mode
198/** This object doesn't copy binary code content.
199 * Only it takes and uses a binary code.
200 */
201class ROCmBinary : public ElfBinary64, public NonCopyableAndNonMovable
202{
203public:
204    /// region map type
205    typedef Array<std::pair<CString, size_t> > RegionMap;
206private:
207    size_t regionsNum;
208    std::unique_ptr<ROCmRegion[]> regions;  ///< AMD metadatas
209    RegionMap regionsMap;
210    size_t codeSize;
211    cxbyte* code;
212    size_t globalDataSize;
213    cxbyte* globalData;
214    CString target;
215    size_t metadataSize;
216    char* metadata;
217    std::unique_ptr<ROCmMetadata> metadataInfo;
218    RegionMap kernelInfosMap;
219    bool newBinFormat;
220public:
221    /// constructor
222    ROCmBinary(size_t binaryCodeSize, cxbyte* binaryCode,
223            Flags creationFlags = ROCMBIN_CREATE_ALL);
224    /// default destructor
225    ~ROCmBinary() = default;
226   
227    /// determine GPU device type from this binary
228    GPUDeviceType determineGPUDeviceType(uint32_t& archMinor,
229                     uint32_t& archStepping) const;
230   
231    /// get regions number
232    size_t getRegionsNum() const
233    { return regionsNum; }
234   
235    /// get region by index
236    const ROCmRegion& getRegion(size_t index) const
237    { return regions[index]; }
238   
239    /// get region by name
240    const ROCmRegion& getRegion(const char* name) const;
241   
242    /// get code size
243    size_t getCodeSize() const
244    { return codeSize; }
245    /// get code
246    const cxbyte* getCode() const
247    { return code; }
248    /// get code
249    cxbyte* getCode()
250    { return code; }
251   
252    /// get global data size
253    size_t getGlobalDataSize() const
254    { return globalDataSize; }
255   
256    /// get global data
257    const cxbyte* getGlobalData() const
258    { return globalData; }
259    /// get global data
260    cxbyte* getGlobalData()
261    { return globalData; }
262   
263    /// get metadata size
264    size_t getMetadataSize() const
265    { return metadataSize; }
266    /// get metadata
267    const char* getMetadata() const
268    { return metadata; }
269    /// get metadata
270    char* getMetadata()
271    { return metadata; }
272   
273    /// has metadata info
274    bool hasMetadataInfo() const
275    { return metadataInfo!=nullptr; }
276   
277    /// get metadata info
278    const ROCmMetadata& getMetadataInfo() const
279    { return *metadataInfo; }
280   
281    /// get kernel metadata infos number
282    size_t getKernelInfosNum() const
283    { return metadataInfo->kernels.size(); }
284   
285    /// get kernel metadata info
286    const ROCmKernelMetadata& getKernelInfo(size_t index) const
287    { return metadataInfo->kernels[index]; }
288   
289    /// get kernel metadata info by name
290    const ROCmKernelMetadata& getKernelInfo(const char* name) const;
291   
292    /// get target
293    const CString& getTarget() const
294    { return target; }
295   
296    /// return true is new binary format
297    bool isNewBinaryFormat() const
298    { return newBinFormat; }
299   
300    /// returns true if kernel map exists
301    bool hasRegionMap() const
302    { return (creationFlags & ROCMBIN_CREATE_REGIONMAP) != 0; }
303    /// returns true if object has kernel map
304    bool hasKernelInfoMap() const
305    { return (creationFlags & ROCMBIN_CREATE_KERNELINFOMAP) != 0; }
306};
307
308enum {
309    ROCMFLAG_USE_PRIVATE_SEGMENT_BUFFER = AMDHSAFLAG_USE_PRIVATE_SEGMENT_BUFFER,
310    ROCMFLAG_USE_DISPATCH_PTR = AMDHSAFLAG_USE_DISPATCH_PTR,
311    ROCMFLAG_USE_QUEUE_PTR = AMDHSAFLAG_USE_QUEUE_PTR,
312    ROCMFLAG_USE_KERNARG_SEGMENT_PTR = AMDHSAFLAG_USE_KERNARG_SEGMENT_PTR,
313    ROCMFLAG_USE_DISPATCH_ID = AMDHSAFLAG_USE_DISPATCH_ID,
314    ROCMFLAG_USE_FLAT_SCRATCH_INIT = AMDHSAFLAG_USE_FLAT_SCRATCH_INIT,
315    ROCMFLAG_USE_PRIVATE_SEGMENT_SIZE = AMDHSAFLAG_USE_PRIVATE_SEGMENT_SIZE,
316    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_BIT = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_BIT,
317    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_X = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_X,
318    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Y = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Y,
319    ROCMFLAG_USE_GRID_WORKGROUP_COUNT_Z = AMDHSAFLAG_USE_GRID_WORKGROUP_COUNT_Z,
320   
321    ROCMFLAG_USE_ORDERED_APPEND_GDS = AMDHSAFLAG_USE_ORDERED_APPEND_GDS,
322    ROCMFLAG_PRIVATE_ELEM_SIZE_BIT = AMDHSAFLAG_PRIVATE_ELEM_SIZE_BIT,
323    ROCMFLAG_USE_PTR64 = AMDHSAFLAG_USE_PTR64,
324    ROCMFLAG_USE_DYNAMIC_CALL_STACK = AMDHSAFLAG_USE_DYNAMIC_CALL_STACK,
325    ROCMFLAG_USE_DEBUG_ENABLED = AMDHSAFLAG_USE_DEBUG_ENABLED,
326    ROCMFLAG_USE_XNACK_ENABLED = AMDHSAFLAG_USE_XNACK_ENABLED
327};
328
329/// ROCm kernel configuration structure
330typedef AmdHsaKernelConfig ROCmKernelConfig;
331
332/// check whether is Amd OpenCL 2.0 binary
333extern bool isROCmBinary(size_t binarySize, const cxbyte* binary);
334
335/*
336 * ROCm Binary Generator
337 */
338
339enum: cxuint {
340    ROCMSECTID_HASH = ELFSECTID_OTHER_BUILTIN,
341    ROCMSECTID_DYNAMIC,
342    ROCMSECTID_NOTE,
343    ROCMSECTID_GPUCONFIG,
344    ROCMSECTID_MAX = ROCMSECTID_GPUCONFIG
345};
346
347/// ROCm binary symbol input
348struct ROCmSymbolInput
349{
350    CString symbolName; ///< symbol name
351    size_t offset;  ///< offset in code
352    size_t size;    ///< size of symbol
353    ROCmRegionType type;  ///< type
354};
355
356/// ROCm binary input structure
357struct ROCmInput
358{
359    GPUDeviceType deviceType;   ///< GPU device type
360    uint32_t archMinor;         ///< GPU arch minor
361    uint32_t archStepping;      ///< GPU arch stepping
362    uint32_t eflags;    ///< ELF headef e_flags field
363    bool newBinFormat;       ///< use new binary format for ROCm
364    size_t globalDataSize;  ///< global data size
365    const cxbyte* globalData;   ///< global data
366    std::vector<ROCmSymbolInput> symbols;   ///< symbols
367    size_t codeSize;        ///< code size
368    const cxbyte* code;     ///< code
369    size_t commentSize; ///< comment size (can be null)
370    const char* comment; ///< comment
371    CString target;     ///< LLVM target triple with device name
372    CString targetTripple; ///< same LLVM target tripple
373    size_t metadataSize;    ///< metadata size
374    const char* metadata;   ///< metadata
375    bool useMetadataInfo;   ///< use metadatainfo instead same metadata
376    ROCmMetadata metadataInfo; ///< metadata info
377    std::vector<BinSection> extraSections;  ///< extra sections
378    std::vector<BinSymbol> extraSymbols;    ///< extra symbols
379   
380    /// add empty kernel with default values
381    void addEmptyKernel(const char* kernelName);
382};
383
384/// ROCm binary generator
385class ROCmBinGenerator: public NonCopyableAndNonMovable
386{
387private:
388    private:
389    bool manageable;
390    const ROCmInput* input;
391   
392    void generateInternal(std::ostream* osPtr, std::vector<char>* vPtr,
393             Array<cxbyte>* aPtr) const;
394public:
395    /// constructor
396    ROCmBinGenerator();
397    /// constructor with ROCm input
398    explicit ROCmBinGenerator(const ROCmInput* rocmInput);
399   
400    /// constructor
401    /**
402     * \param deviceType device type
403     * \param archMinor architecture minor number
404     * \param archStepping architecture stepping number
405     * \param codeSize size of code
406     * \param code code pointer
407     * \param globalDataSize size of global data
408     * \param globalData global data pointer
409     * \param symbols symbols (kernels, datas,...)
410     */
411    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
412            size_t codeSize, const cxbyte* code,
413            size_t globalDataSize, const cxbyte* globalData,
414            const std::vector<ROCmSymbolInput>& symbols);
415    /// constructor
416    ROCmBinGenerator(GPUDeviceType deviceType, uint32_t archMinor, uint32_t archStepping,
417            size_t codeSize, const cxbyte* code,
418            size_t globalDataSize, const cxbyte* globalData,
419            std::vector<ROCmSymbolInput>&& symbols);
420    /// destructor
421    ~ROCmBinGenerator();
422   
423    /// get input
424    const ROCmInput* getInput() const
425    { return input; }
426   
427    /// set input
428    void setInput(const ROCmInput* input);
429   
430    /// generates binary to array of bytes
431    void generate(Array<cxbyte>& array) const;
432   
433    /// generates binary to output stream
434    void generate(std::ostream& os) const;
435   
436    /// generates binary to vector of char
437    void generate(std::vector<char>& vector) const;
438};
439
440};
441
442#endif
Note: See TracBrowser for help on using the repository browser.