CLRX  1
An unofficial OpenCL extensions designed for Radeon GPUs
AsmFormats.h
Go to the documentation of this file.
1 /*
2  * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3  * Copyright (C) 2014-2017 Mateusz Szpakowski
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
23 #ifndef __CLRX_ASMFORMATS_H__
24 #define __CLRX_ASMFORMATS_H__
25 
26 #include <CLRX/Config.h>
27 #include <cstdint>
28 #include <string>
29 #include <vector>
30 #include <utility>
31 #include <unordered_set>
32 #include <unordered_map>
35 #include <CLRX/amdbin/AmdBinGen.h>
38 #include <CLRX/utils/Utilities.h>
39 #include <CLRX/utils/GPUId.h>
40 #include <CLRX/amdasm/Commons.h>
41 
42 namespace CLRX
43 {
44 
47 {
48  DATA = 0,
49  CODE,
50  CONFIG,
52 
53  AMD_HEADER = LAST_COMMON+1,
54  AMD_METADATA,
55  AMD_CALNOTE,
56 
57  AMDCL2_RWDATA = LAST_COMMON+1,
58  AMDCL2_BSS,
59  AMDCL2_SAMPLERINIT,
60  AMDCL2_SETUP,
61  AMDCL2_STUB,
62  AMDCL2_METADATA,
63  AMDCL2_ISAMETADATA,
64  AMDCL2_CONFIG_CTRL_DIRECTIVE,
65 
66  GALLIUM_COMMENT = LAST_COMMON+1,
67  GALLIUM_CONFIG_CTRL_DIRECTIVE,
68 
69  ROCM_COMMENT = LAST_COMMON+1,
70  ROCM_CONFIG_CTRL_DIRECTIVE,
71 
72  EXTRA_FIRST = 0xfc,
73  EXTRA_PROGBITS = 0xfc,
74  EXTRA_NOBITS = 0xfd,
75  EXTRA_NOTE = 0xfe,
76  EXTRA_SECTION = 0xff
77 };
78 
79 enum: cxuint
80 {
81  ASMSECT_ABS = UINT_MAX,
82  ASMSECT_NONE = UINT_MAX,
83  ASMKERN_GLOBAL = UINT_MAX,
84  ASMKERN_INNER = UINT_MAX-1
85 };
86 
87 enum: Flags
88 {
89  ASMSECT_WRITEABLE = 1,
90  ASMSECT_ADDRESSABLE = 2,
91  ASMSECT_ABS_ADDRESSABLE = 4,
93 
94  ASMELFSECT_ALLOCATABLE = 0x10,
95  ASMELFSECT_WRITEABLE = 0x20,
96  ASMELFSECT_EXECUTABLE = 0x40
97 };
98 
99 class Assembler;
100 class AsmExpression;
101 struct AsmRelocation;
102 struct AsmSymbol;
103 
106 {
107 public:
109  AsmFormatException() = default;
111  explicit AsmFormatException(const std::string& message);
113  virtual ~AsmFormatException() noexcept = default;
114 };
115 
118 {
119 public:
121  struct SectionInfo
122  {
123  const char* name;
126  };
127 protected:
129 
131  explicit AsmFormatHandler(Assembler& assembler);
132 public:
133  virtual ~AsmFormatHandler();
134 
136 
142  virtual cxuint addKernel(const char* kernelName) = 0;
144 
151  virtual cxuint addSection(const char* sectionName, cxuint kernelId) = 0;
152 
154  virtual cxuint getSectionId(const char* sectionName) const = 0;
155 
157  virtual void setCurrentKernel(cxuint kernel) = 0;
159  virtual void setCurrentSection(cxuint sectionId) = 0;
160 
162  virtual SectionInfo getSectionInfo(cxuint sectionId) const = 0;
164  virtual bool parsePseudoOp(const CString& firstName,
165  const char* stmtPlace, const char* linePtr) = 0;
167  virtual void handleLabel(const CString& label);
169  virtual bool resolveSymbol(const AsmSymbol& symbol,
170  uint64_t& value, cxuint& sectionId);
172  virtual bool resolveRelocation(const AsmExpression* expr,
173  uint64_t& value, cxuint& sectionId);
175  virtual bool prepareBinary() = 0;
177  virtual void writeBinary(std::ostream& os) const = 0;
179  virtual void writeBinary(Array<cxbyte>& array) const = 0;
180 };
181 
184 {
185 public:
187  explicit AsmRawCodeHandler(Assembler& assembler);
189  ~AsmRawCodeHandler() = default;
190 
191  cxuint addKernel(const char* kernelName);
192  cxuint addSection(const char* sectionName, cxuint kernelId);
193 
194  cxuint getSectionId(const char* sectionName) const;
195 
196  void setCurrentKernel(cxuint kernel);
197  void setCurrentSection(cxuint sectionId);
198 
199  SectionInfo getSectionInfo(cxuint sectionId) const;
200  bool parsePseudoOp(const CString& firstName,
201  const char* stmtPlace, const char* linePtr);
202 
203  bool prepareBinary();
204  void writeBinary(std::ostream& os) const;
205  void writeBinary(Array<cxbyte>& array) const;
206 };
207 
210 {
211 private:
212  typedef std::unordered_map<CString, cxuint> SectionMap;
213  friend struct AsmAmdPseudoOps;
214  AmdInput output;
215  struct Section
216  {
217  cxuint kernelId;
218  AsmSectionType type;
219  cxuint elfBinSectId;
220  const char* name;
221  uint32_t extraId; // for example CALNote id
222  };
223  struct Kernel
224  {
225  cxuint headerSection;
226  cxuint metadataSection;
227  cxuint configSection;
228  cxuint codeSection;
229  cxuint dataSection;
230  std::vector<cxuint> calNoteSections;
231  SectionMap extraSectionMap;
232  cxuint extraSectionCount;
233  cxuint savedSection;
234  std::unordered_set<CString> argNamesSet;
235  cxuint allocRegs[MAX_REGTYPES_NUM];
236  Flags allocRegFlags;
237  };
238  std::vector<Section> sections;
239  // use pointer to prevents copying Kernel objects
240  std::vector<Kernel*> kernelStates;
241  SectionMap extraSectionMap;
242  cxuint dataSection; // global
243  cxuint savedSection;
244  cxuint extraSectionCount;
245 
246  cxuint detectedDriverVersion;
247 
248  void saveCurrentSection();
249  void restoreCurrentAllocRegs();
250  void saveCurrentAllocRegs();
251 
252  cxuint determineDriverVersion() const;
253 public:
255  explicit AsmAmdHandler(Assembler& assembler);
257  ~AsmAmdHandler();
258 
259  cxuint addKernel(const char* kernelName);
260  cxuint addSection(const char* sectionName, cxuint kernelId);
261 
262  cxuint getSectionId(const char* sectionName) const;
263  void setCurrentKernel(cxuint kernel);
264  void setCurrentSection(cxuint sectionId);
265 
266  SectionInfo getSectionInfo(cxuint sectionId) const;
267  bool parsePseudoOp(const CString& firstName,
268  const char* stmtPlace, const char* linePtr);
269 
270  bool prepareBinary();
271  void writeBinary(std::ostream& os) const;
272  void writeBinary(Array<cxbyte>& array) const;
274  const AmdInput* getOutput() const
275  { return &output; }
276 };
277 
280 {
285  bool ieeeMode;
289  bool tgSize;
290  bool debugMode;
292  bool dx10Clamp;
293 
294  void initialize();
295 };
296 
299 {
300 private:
301  typedef std::unordered_map<CString, cxuint> SectionMap;
302  friend struct AsmAmdCL2PseudoOps;
303  AmdCL2Input output;
304  struct Section
305  {
306  cxuint kernelId;
307  AsmSectionType type;
308  cxuint elfBinSectId;
309  const char* name;
310  uint32_t extraId;
311  };
312  struct Relocation
313  {
314  RelocType type;
315  cxuint symbol; // 0,1,2
316  size_t addend;
317  };
318  /* relocmap: key - symbol, value - relocation */
319  typedef std::unordered_map<CString, Relocation> RelocMap;
320  struct Kernel
321  {
322  cxuint stubSection;
323  cxuint setupSection;
324  cxuint metadataSection;
325  cxuint isaMetadataSection;
326  cxuint configSection;
327  cxuint ctrlDirSection;
328  cxuint codeSection;
329  cxuint savedSection;
330  bool useHsaConfig; //
331  std::unique_ptr<AsmAmdHsaKernelConfig> hsaConfig; // hsaConfig
332  std::unordered_set<CString> argNamesSet;
333  cxuint allocRegs[MAX_REGTYPES_NUM];
334  Flags allocRegFlags;
335 
336  void initializeKernelConfig();
337  };
338  std::vector<Section> sections;
339  // use pointer to prevents copying Kernel objects
340  std::vector<Kernel*> kernelStates;
341  RelocMap relocsMap;
342  SectionMap extraSectionMap;
343  SectionMap innerExtraSectionMap;
344  cxuint rodataSection; // global inner
345  cxuint dataSection; // global inner
346  cxuint bssSection; // global inner
347  cxuint samplerInitSection;
348  cxuint savedSection;
349  cxuint innerSavedSection;
350  cxuint extraSectionCount;
351  cxuint innerExtraSectionCount;
352 
353  cxuint detectedDriverVersion;
354 
355  void saveCurrentSection();
356  void restoreCurrentAllocRegs();
357  void saveCurrentAllocRegs();
358  cxuint getDriverVersion() const;
359 public:
361  explicit AsmAmdCL2Handler(Assembler& assembler);
363  ~AsmAmdCL2Handler();
364 
365  cxuint addKernel(const char* kernelName);
366  cxuint addSection(const char* sectionName, cxuint kernelId);
367 
368  cxuint getSectionId(const char* sectionName) const;
369  void setCurrentKernel(cxuint kernel);
370  void setCurrentSection(cxuint sectionId);
371 
372  SectionInfo getSectionInfo(cxuint sectionId) const;
373  bool parsePseudoOp(const CString& firstName,
374  const char* stmtPlace, const char* linePtr);
375 
376  bool resolveSymbol(const AsmSymbol& symbol, uint64_t& value, cxuint& sectionId);
377  bool resolveRelocation(const AsmExpression* expr, uint64_t& value, cxuint& sectionId);
378  bool prepareBinary();
379  void writeBinary(std::ostream& os) const;
380  void writeBinary(Array<cxbyte>& array) const;
382  const AmdCL2Input* getOutput() const
383  { return &output; }
384 };
385 
388 {
389 private:
390  enum class Inside : cxbyte {
391  MAINLAYOUT, CONFIG, ARGS, PROGINFO
392  };
393 
394  typedef std::unordered_map<CString, cxuint> SectionMap;
395  friend struct AsmGalliumPseudoOps;
396  GalliumInput output;
397  struct Section
398  {
399  cxuint kernelId;
400  AsmSectionType type;
401  cxuint elfBinSectId;
402  const char* name; // must be available by whole lifecycle
403  };
404  struct Kernel
405  {
406  cxuint defaultSection;
407  std::unique_ptr<AsmAmdHsaKernelConfig> hsaConfig;
408  cxuint ctrlDirSection;
409  bool hasProgInfo;
410  cxbyte progInfoEntries;
411  cxuint allocRegs[MAX_REGTYPES_NUM];
412  Flags allocRegFlags;
413 
414  void initializeAmdHsaKernelConfig();
415  };
416  std::vector<Kernel*> kernelStates;
417  std::vector<Section> sections;
418  std::vector<cxuint> kcodeSelection; // kcode
419  std::stack<std::vector<cxuint> > kcodeSelStack;
420  cxuint currentKcodeKernel;
421  SectionMap extraSectionMap;
422  cxuint codeSection;
423  cxuint dataSection;
424  cxuint commentSection;
425  cxuint savedSection;
426  Inside inside;
427  cxuint extraSectionCount;
428 
429  cxuint detectedDriverVersion;
430  cxuint detectedLLVMVersion;
431 
432  uint32_t archMinor;
433  uint32_t archStepping;
434 
435  void restoreKcodeCurrentAllocRegs();
436  void saveKcodeCurrentAllocRegs();
437 
438  cxuint determineDriverVersion() const;
439  cxuint determineLLVMVersion() const;
440 public:
442  explicit AsmGalliumHandler(Assembler& assembler);
445 
446  cxuint addKernel(const char* kernelName);
447  cxuint addSection(const char* sectionName, cxuint kernelId);
448 
449  cxuint getSectionId(const char* sectionName) const;
450  void setCurrentKernel(cxuint kernel);
451  void setCurrentSection(cxuint sectionId);
452 
453  SectionInfo getSectionInfo(cxuint sectionId) const;
454  bool parsePseudoOp(const CString& firstName,
455  const char* stmtPlace, const char* linePtr);
456  void handleLabel(const CString& label);
457 
458  bool prepareBinary();
459  void writeBinary(std::ostream& os) const;
460  void writeBinary(Array<cxbyte>& array) const;
462  const GalliumInput* getOutput() const
463  { return &output; }
464 };
465 
467 
470 {
471 private:
472  typedef std::unordered_map<CString, cxuint> SectionMap;
473  friend struct AsmROCmPseudoOps;
474  ROCmInput output;
475  struct Section
476  {
477  cxuint kernelId;
478  AsmSectionType type;
479  cxuint elfBinSectId;
480  const char* name; // must be available by whole lifecycle
481  };
482  struct Kernel
483  {
484  cxuint configSection;
485  std::unique_ptr<AsmROCmKernelConfig> config;
486  bool isFKernel;
487  cxuint ctrlDirSection;
488  cxuint savedSection;
489  Flags allocRegFlags;
490  cxuint allocRegs[MAX_REGTYPES_NUM];
491 
492  void initializeKernelConfig();
493  };
494  std::vector<Kernel*> kernelStates;
495  std::vector<Section> sections;
496  std::vector<cxuint> kcodeSelection; // kcode
497  std::stack<std::vector<cxuint> > kcodeSelStack;
498  cxuint currentKcodeKernel;
499  SectionMap extraSectionMap;
500  cxuint codeSection;
501  cxuint commentSection;
502  cxuint savedSection;
503  cxuint extraSectionCount;
504 
505  void restoreKcodeCurrentAllocRegs();
506  void saveKcodeCurrentAllocRegs();
507 
508 public:
510  explicit AsmROCmHandler(Assembler& assembler);
512  ~AsmROCmHandler();
513 
514  cxuint addKernel(const char* kernelName);
515  cxuint addSection(const char* sectionName, cxuint kernelId);
516 
517  cxuint getSectionId(const char* sectionName) const;
518  void setCurrentKernel(cxuint kernel);
519  void setCurrentSection(cxuint sectionId);
520 
521  SectionInfo getSectionInfo(cxuint sectionId) const;
522  bool parsePseudoOp(const CString& firstName,
523  const char* stmtPlace, const char* linePtr);
524  void handleLabel(const CString& label);
525 
526  bool prepareBinary();
527  void writeBinary(std::ostream& os) const;
528  void writeBinary(Array<cxbyte>& array) const;
530  const ROCmInput* getOutput() const
531  { return &output; }
532 };
533 
534 };
535 
536 #endif
code of program or kernel
common definitions for assembler and disassembler
main class of assembler
Definition: Assembler.h:403
Assembler & assembler
assembler reference
Definition: AsmFormats.h:128
non copyable and non movable base structure (class)
Definition: Utilities.h:43
assembler expression class
Definition: AsmDefs.h:289
uint32_t Flags
type for declaring various flags
Definition: Utilities.h:97
ROCm binary input structure.
Definition: ROCmBinaries.h:164
cxbyte userDataNum
number of user data
Definition: AsmFormats.h:284
Flags flags
section flags
Definition: AsmFormats.h:125
configuration (global or for kernel)
bool privilegedMode
prvileged mode
Definition: AsmFormats.h:291
AMD Catalyst kernel&#39;s metadata.
handles raw code format
Definition: AsmFormats.h:183
Gallium input.
Definition: GalliumBinaries.h:394
AsmSectionType type
section type
Definition: AsmFormats.h:124
bool debugMode
debug mode
Definition: AsmFormats.h:290
AMD HSA kernel configuration structure.
Definition: Commons.h:64
ROCm binaries handling.
const char * name
section name
Definition: AsmFormats.h:123
an array class
Definition: Containers.h:38
bool ieeeMode
IEEE mode.
Definition: AsmFormats.h:285
cxbyte floatMode
float mode
Definition: AsmFormats.h:286
Configuration header.
handles ROCM binary format
Definition: AsmFormats.h:469
cxuint RelocType
relocation type
Definition: Commons.h:33
bool tgSize
enable TG_SIZE_EN bit
Definition: AsmFormats.h:289
data object
Definition: ROCmBinaries.h:52
absolute section id
Definition: AsmFormats.h:81
GalliumCompute binaries handling (only with LLVM 3.6)
assembler relocation
Definition: AsmDefs.h:276
const AmdCL2Input * getOutput() const
get output structure pointer
Definition: AsmFormats.h:382
AsmSectionType
assembler section type
Definition: AsmFormats.h:46
none section id
Definition: AsmFormats.h:82
AMD binaries handling.
assdembler format handler
Definition: AsmFormats.h:117
unsigned char cxbyte
unsigned byte
Definition: Config.h:215
AMD OpenCL2 binaries generator.
main namespace
Definition: AsmDefs.h:38
unsigned int cxuint
unsigned int
Definition: Config.h:223
const GalliumInput * getOutput() const
get output object (input for bingenerator)
Definition: AsmFormats.h:462
section information
Definition: AsmFormats.h:121
AMD Catalyst kernel&#39;s header.
utilities for other libraries and programs
exception class
Definition: Utilities.h:58
AMD binaries generator.
main Input for AmdCL2GPUBinGenerator
Definition: AmdCL2BinGen.h:116
cxuint usedVGPRsNum
number of used VGPRs
Definition: AsmFormats.h:282
cxbyte priority
priority
Definition: AsmFormats.h:287
GPU identification utilities.
const AmdInput * getOutput() const
get output structure pointer
Definition: AsmFormats.h:274
main Input for AmdGPUBinGenerator
Definition: AmdBinGen.h:154
bool dx10Clamp
DX10 CLAMP mode.
Definition: AsmFormats.h:292
const ROCmInput * getOutput() const
get output object (input for bingenerator)
Definition: AsmFormats.h:530
cxuint dimMask
mask of dimension (bits: 0 - X, 1 - Y, 2 - Z)
Definition: AsmFormats.h:281
handles GalliumCompute format
Definition: AsmFormats.h:387
cxuint usedSGPRsNum
number of used SGPRs
Definition: AsmFormats.h:283
assembler symbol structure
Definition: AsmDefs.h:159
cxbyte exceptions
enabled exceptions
Definition: AsmFormats.h:288
handles AMD Catalyst format
Definition: AsmFormats.h:209
handles AMD OpenCL 2.0 binary format
Definition: AsmFormats.h:298
assembler format exception
Definition: AsmFormats.h:105
Asm AMD HSA kernel configuration.
Definition: AsmFormats.h:279
section is unresolvable
Definition: AsmFormats.h:92
no kernel, inner global space
Definition: AsmFormats.h:84
simple C-string container
Definition: CString.h:38
no kernel, global space
Definition: AsmFormats.h:83