CLRX  1
An unofficial OpenCL extensions designed for Radeon GPUs
AsmFormats.h
Go to the documentation of this file.
1 /*
2  * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3  * Copyright (C) 2014-2018 Mateusz Szpakowski
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
23 #ifndef __CLRX_ASMFORMATS_H__
24 #define __CLRX_ASMFORMATS_H__
25 
26 #include <CLRX/Config.h>
27 #include <cstdint>
28 #include <string>
29 #include <vector>
30 #include <utility>
31 #include <memory>
32 #include <unordered_set>
33 #include <unordered_map>
36 #include <CLRX/amdbin/AmdBinGen.h>
39 #include <CLRX/utils/Utilities.h>
40 #include <CLRX/utils/GPUId.h>
41 #include <CLRX/amdasm/Commons.h>
42 
43 namespace CLRX
44 {
45 
48 {
49  DATA = 0,
50  CODE,
51  CONFIG,
53 
54  AMD_HEADER = LAST_COMMON+1,
55  AMD_METADATA,
56  AMD_CALNOTE,
57 
58  AMDCL2_RWDATA = LAST_COMMON+1,
59  AMDCL2_BSS,
60  AMDCL2_SAMPLERINIT,
61  AMDCL2_SETUP,
62  AMDCL2_STUB,
63  AMDCL2_METADATA,
64  AMDCL2_ISAMETADATA,
65  AMDCL2_CONFIG_CTRL_DIRECTIVE,
66  AMDCL2_DUMMY,
67 
68  GALLIUM_COMMENT = LAST_COMMON+1,
69  GALLIUM_CONFIG_CTRL_DIRECTIVE,
71 
72  ROCM_COMMENT = LAST_COMMON+1,
73  ROCM_CONFIG_CTRL_DIRECTIVE,
74  ROCM_METADATA,
75  ROCM_GOT,
76 
77  EXTRA_FIRST = 0xfc,
78  EXTRA_PROGBITS = 0xfc,
79  EXTRA_NOBITS = 0xfd,
80  EXTRA_NOTE = 0xfe,
81  EXTRA_SECTION = 0xff
82 };
83 
84 enum: AsmSectionId
85 {
86  ASMSECT_ABS = UINT_MAX,
87  ASMSECT_NONE = UINT_MAX,
88 };
89 
90 enum: AsmKernelId
91 {
92  ASMKERN_GLOBAL = UINT_MAX,
93  ASMKERN_INNER = UINT_MAX-1
94 };
95 
96 enum: Flags
97 {
98  ASMSECT_WRITEABLE = 1,
99  ASMSECT_ADDRESSABLE = 2,
100  ASMSECT_ABS_ADDRESSABLE = 4,
102 
103  ASMELFSECT_ALLOCATABLE = 0x10,
104  ASMELFSECT_WRITEABLE = 0x20,
105  ASMELFSECT_EXECUTABLE = 0x40
106 };
107 
108 class Assembler;
109 class AsmExpression;
110 struct AsmRelocation;
111 struct AsmSymbol;
112 
115 {
116 public:
118  AsmFormatException() = default;
120  explicit AsmFormatException(const std::string& message);
122  virtual ~AsmFormatException() noexcept = default;
123 };
124 
127 {
128 public:
130  struct SectionInfo
131  {
132  const char* name;
136 
137  SectionInfo() : name(nullptr), type(AsmSectionType::DATA), flags(0),
138  relSpace(UINT_MAX)
139  { }
140  SectionInfo(const char* _name, AsmSectionType _type, Flags _flags = 0,
141  cxuint _relSpace = UINT_MAX) : name(_name), type(_type), flags(_flags),
142  relSpace(_relSpace)
143  { }
144  };
145 
146  struct KernelBase
147  {
148  cxuint allocRegs[MAX_REGTYPES_NUM];
149  Flags allocRegFlags;
150  };
151 protected:
153  bool sectionDiffsResolvable;
154 
156  explicit AsmFormatHandler(Assembler& assembler);
157 
158  // resolve LO32BIT/HI32BIT relocations (partially, helper)
159  bool resolveLoHiRelocExpression(const AsmExpression* expr, RelocType& relType,
160  AsmSectionId& relSectionId, uint64_t& relValue);
161 public:
162  virtual ~AsmFormatHandler();
163 
166  { return sectionDiffsResolvable; }
167 
169 
175  virtual AsmKernelId addKernel(const char* kernelName) = 0;
177 
184  virtual AsmSectionId addSection(const char* sectionName, AsmKernelId kernelId) = 0;
185 
187  virtual AsmSectionId getSectionId(const char* sectionName) const = 0;
188 
190  virtual void setCurrentKernel(AsmKernelId kernel) = 0;
192  virtual void setCurrentSection(AsmSectionId sectionId) = 0;
193 
195  virtual SectionInfo getSectionInfo(AsmSectionId sectionId) const = 0;
197  virtual bool parsePseudoOp(const CString& firstName,
198  const char* stmtPlace, const char* linePtr) = 0;
200  virtual void handleLabel(const CString& label);
202  virtual bool resolveSymbol(const AsmSymbol& symbol,
203  uint64_t& value, AsmSectionId& sectionId);
205  virtual bool resolveRelocation(const AsmExpression* expr,
206  uint64_t& value, AsmSectionId& sectionId);
208  virtual bool prepareBinary() = 0;
210  virtual void writeBinary(std::ostream& os) const = 0;
212  virtual void writeBinary(Array<cxbyte>& array) const = 0;
213 
215  virtual bool prepareSectionDiffsResolving();
216 };
217 
220 {
221 protected:
222  friend struct AsmKcodePseudoOps;
223  std::vector<AsmKernelId> kcodeSelection; // kcode
224  std::stack<std::vector<AsmKernelId> > kcodeSelStack;
225  AsmKernelId currentKcodeKernel;
226  AsmSectionId codeSection;
227 
228  explicit AsmKcodeHandler(Assembler& assembler);
229  ~AsmKcodeHandler() = default;
230 
231  void restoreKcodeCurrentAllocRegs();
232  void saveKcodeCurrentAllocRegs();
233  // prepare kcode state while preparing binary
234  void prepareKcodeState();
235 public:
236  void handleLabel(const CString& label);
237 
239  virtual bool isCodeSection() const = 0;
241  virtual KernelBase& getKernelBase(AsmKernelId index) = 0;
243  virtual size_t getKernelsNum() const = 0;
244 };
245 
248 {
249 public:
251  explicit AsmRawCodeHandler(Assembler& assembler);
253  ~AsmRawCodeHandler() = default;
254 
255  AsmKernelId addKernel(const char* kernelName);
256  AsmSectionId addSection(const char* sectionName, AsmKernelId kernelId);
257 
258  AsmSectionId getSectionId(const char* sectionName) const;
259 
260  void setCurrentKernel(AsmKernelId kernel);
261  void setCurrentSection(AsmSectionId sectionId);
262 
263  SectionInfo getSectionInfo(AsmSectionId sectionId) const;
264  bool parsePseudoOp(const CString& firstName,
265  const char* stmtPlace, const char* linePtr);
266 
267  bool prepareBinary();
268  void writeBinary(std::ostream& os) const;
269  void writeBinary(Array<cxbyte>& array) const;
270 };
271 
274 {
275 private:
276  typedef std::unordered_map<CString, AsmSectionId> SectionMap;
277  friend struct AsmAmdPseudoOps;
278  AmdInput output;
279  struct Section
280  {
281  AsmKernelId kernelId;
282  AsmSectionType type;
283  AsmSectionId elfBinSectId;
284  const char* name;
285  uint32_t extraId; // for example CALNote id
286  };
287  struct Kernel : KernelBase
288  {
289  AsmSectionId headerSection;
290  AsmSectionId metadataSection;
291  AsmSectionId configSection;
292  AsmSectionId codeSection;
293  AsmSectionId dataSection;
294  std::vector<AsmSectionId> calNoteSections;
295  SectionMap extraSectionMap;
296  AsmSectionId extraSectionCount;
297  AsmSectionId savedSection;
298  std::unordered_set<CString> argNamesSet;
299 
300  explicit Kernel(AsmSectionId _codeSection = ASMSECT_NONE) : KernelBase{},
301  headerSection(ASMSECT_NONE), metadataSection(ASMSECT_NONE),
302  configSection(ASMSECT_NONE), codeSection(_codeSection),
303  dataSection(ASMSECT_NONE), extraSectionCount(0),
304  savedSection(ASMSECT_NONE)
305  { }
306  };
307  std::vector<Section> sections;
308  // use pointer to prevents copying Kernel objects
309  std::vector<Kernel*> kernelStates;
310  SectionMap extraSectionMap;
311  AsmSectionId dataSection; // global
312  AsmSectionId savedSection;
313  AsmSectionId extraSectionCount;
314 
315  cxuint detectedDriverVersion;
316 
317  void saveCurrentSection();
318  void restoreCurrentAllocRegs();
319  void saveCurrentAllocRegs();
320 
321  cxuint determineDriverVersion() const;
322 public:
324  explicit AsmAmdHandler(Assembler& assembler);
326  ~AsmAmdHandler();
327 
328  AsmKernelId addKernel(const char* kernelName);
329  AsmSectionId addSection(const char* sectionName, AsmKernelId kernelId);
330 
331  AsmSectionId getSectionId(const char* sectionName) const;
332  void setCurrentKernel(AsmKernelId kernel);
333  void setCurrentSection(AsmSectionId sectionId);
334 
335  SectionInfo getSectionInfo(AsmSectionId sectionId) const;
336  bool parsePseudoOp(const CString& firstName,
337  const char* stmtPlace, const char* linePtr);
338 
339  bool prepareBinary();
340  void writeBinary(std::ostream& os) const;
341  void writeBinary(Array<cxbyte>& array) const;
343  const AmdInput* getOutput() const
344  { return &output; }
345 };
346 
349 {
354  bool ieeeMode;
358  bool tgSize;
359  bool debugMode;
361  bool dx10Clamp;
362 
363  void initialize();
364 };
365 
368 {
369 private:
370  typedef std::unordered_map<CString, AsmSectionId> SectionMap;
371  friend struct AsmAmdCL2PseudoOps;
372  AmdCL2Input output;
373  struct Section
374  {
375  AsmKernelId kernelId;
376  AsmSectionType type;
377  AsmSectionId elfBinSectId;
378  const char* name;
379  uint32_t extraId;
380  };
381  struct Relocation
382  {
383  RelocType type;
384  cxuint symbol; // 0,1,2
385  size_t addend;
386  };
387  /* relocmap: key - symbol, value - relocation */
388  typedef std::unordered_map<CString, Relocation> RelocMap;
389  struct Kernel : KernelBase
390  {
391  AsmSectionId stubSection;
392  AsmSectionId setupSection;
393  AsmSectionId metadataSection;
394  AsmSectionId isaMetadataSection;
395  AsmSectionId configSection;
396  AsmSectionId ctrlDirSection;
397  AsmSectionId codeSection;
398  AsmSectionId savedSection;
399  bool useHsaConfig; //
400  std::unique_ptr<AsmAmdHsaKernelConfig> hsaConfig; // hsaConfig
401  std::unordered_set<CString> argNamesSet;
402 
403  explicit Kernel(AsmSectionId _codeSection = ASMSECT_NONE) : KernelBase{},
404  stubSection(ASMSECT_NONE), setupSection(ASMSECT_NONE),
405  metadataSection(ASMSECT_NONE), isaMetadataSection(ASMSECT_NONE),
406  configSection(ASMSECT_NONE), ctrlDirSection(ASMSECT_NONE),
407  codeSection(_codeSection), savedSection(ASMSECT_NONE),
408  useHsaConfig(false)
409  { }
410 
411  void initializeKernelConfig();
412  };
413  std::vector<Section> sections;
414  // use pointer to prevents copying Kernel objects
415  std::vector<Kernel*> kernelStates;
416  RelocMap relocsMap;
417  SectionMap extraSectionMap;
418  SectionMap innerExtraSectionMap;
419  AsmSectionId rodataSection; // global inner
420  AsmSectionId dataSection; // global inner
421  AsmSectionId bssSection; // global inner
422  AsmSectionId samplerInitSection;
423  AsmSectionId savedSection;
424  AsmSectionId innerSavedSection;
425  AsmSectionId extraSectionCount;
426  AsmSectionId innerExtraSectionCount;
427  bool hsaLayout;
428 
429  cxuint detectedDriverVersion;
430 
431  void saveCurrentSection();
432  void restoreCurrentAllocRegs();
433  void saveCurrentAllocRegs();
434  cxuint getDriverVersion() const;
435 public:
437  explicit AsmAmdCL2Handler(Assembler& assembler);
439  ~AsmAmdCL2Handler();
440 
441  AsmKernelId addKernel(const char* kernelName);
442  AsmSectionId addSection(const char* sectionName, AsmKernelId kernelId);
443 
444  AsmSectionId getSectionId(const char* sectionName) const;
445  void setCurrentKernel(AsmKernelId kernel);
446  void setCurrentSection(AsmSectionId sectionId);
447 
448  SectionInfo getSectionInfo(AsmSectionId sectionId) const;
449  bool parsePseudoOp(const CString& firstName,
450  const char* stmtPlace, const char* linePtr);
451 
452  bool resolveSymbol(const AsmSymbol& symbol, uint64_t& value, AsmSectionId& sectionId);
453  bool resolveRelocation(const AsmExpression* expr, uint64_t& value,
454  AsmSectionId& sectionId);
455  bool prepareBinary();
456  void writeBinary(std::ostream& os) const;
457  void writeBinary(Array<cxbyte>& array) const;
459  const AmdCL2Input* getOutput() const
460  { return &output; }
461 
462  // kcode support
463  bool isCodeSection() const;
464  KernelBase& getKernelBase(AsmKernelId index);
465  size_t getKernelsNum() const;
466  void handleLabel(const CString& label);
467 };
468 
471 {
472 private:
473  enum class Inside : cxbyte {
474  MAINLAYOUT, CONFIG, ARGS, PROGINFO
475  };
476 
477  typedef std::unordered_map<CString, AsmSectionId> SectionMap;
478  friend struct AsmGalliumPseudoOps;
479  GalliumInput output;
480  struct Section
481  {
482  AsmKernelId kernelId;
483  AsmSectionType type;
484  AsmSectionId elfBinSectId;
485  const char* name; // must be available by whole lifecycle
486  };
487  struct Kernel : KernelBase
488  {
489  AsmSectionId defaultSection;
490  std::unique_ptr<AsmAmdHsaKernelConfig> hsaConfig;
491  AsmSectionId ctrlDirSection;
492  bool hasProgInfo;
493  cxbyte progInfoEntries;
494 
495  explicit Kernel(AsmSectionId _defaultSection = ASMSECT_NONE) : KernelBase{},
496  defaultSection(_defaultSection), hsaConfig(nullptr),
497  ctrlDirSection(ASMSECT_NONE), hasProgInfo(false), progInfoEntries(0)
498  { }
499 
500  void initializeAmdHsaKernelConfig();
501  };
502  std::vector<Kernel*> kernelStates;
503  std::vector<Section> sections;
504  SectionMap extraSectionMap;
505  AsmSectionId dataSection;
506  AsmSectionId commentSection;
507  AsmSectionId scratchSection;
508  AsmSectionId savedSection;
509  Inside inside;
510  AsmSectionId extraSectionCount;
511 
512  cxuint detectedDriverVersion;
513  cxuint detectedLLVMVersion;
514 
515  uint32_t archMinor;
516  uint32_t archStepping;
517 
518  cxuint determineDriverVersion() const;
519  cxuint determineLLVMVersion() const;
520 public:
522  explicit AsmGalliumHandler(Assembler& assembler);
525 
526  AsmKernelId addKernel(const char* kernelName);
527  AsmSectionId addSection(const char* sectionName, AsmKernelId kernelId);
528 
529  AsmSectionId getSectionId(const char* sectionName) const;
530  void setCurrentKernel(AsmKernelId kernel);
531  void setCurrentSection(AsmSectionId sectionId);
532 
533  SectionInfo getSectionInfo(AsmSectionId sectionId) const;
534  bool parsePseudoOp(const CString& firstName,
535  const char* stmtPlace, const char* linePtr);
536 
537  bool resolveSymbol(const AsmSymbol& symbol, uint64_t& value, AsmSectionId& sectionId);
538  bool resolveRelocation(const AsmExpression* expr, uint64_t& value,
539  AsmSectionId& sectionId);
540  bool prepareBinary();
541  void writeBinary(std::ostream& os) const;
542  void writeBinary(Array<cxbyte>& array) const;
544  const GalliumInput* getOutput() const
545  { return &output; }
546 
547  // kcode support
548  bool isCodeSection() const;
549  KernelBase& getKernelBase(AsmKernelId index);
550  size_t getKernelsNum() const;
551 };
552 
554 
557 {
558 private:
559  typedef std::unordered_map<CString, AsmSectionId> SectionMap;
560  friend struct AsmROCmPseudoOps;
561  ROCmInput output;
562  std::unique_ptr<ROCmBinGenerator> binGen;
563  struct Section
564  {
565  AsmKernelId kernelId;
566  AsmSectionType type;
567  AsmSectionId elfBinSectId;
568  const char* name; // must be available by whole lifecycle
569  };
570  struct Kernel : KernelBase
571  {
572  AsmSectionId configSection;
573  std::unique_ptr<AsmROCmKernelConfig> config;
574  bool isFKernel;
575  AsmSectionId ctrlDirSection;
576  AsmSectionId savedSection;
577 
578  explicit Kernel(AsmSectionId _configSection = ASMSECT_NONE): KernelBase{},
579  configSection(_configSection), config(nullptr), isFKernel(false),
580  ctrlDirSection(ASMSECT_NONE), savedSection(ASMSECT_NONE)
581  { }
582 
583  void initializeKernelConfig();
584  };
585  std::vector<Kernel*> kernelStates;
586  std::vector<Section> sections;
587  std::vector<CString> gotSymbols;
588  SectionMap extraSectionMap;
589  AsmSectionId commentSection;
590  AsmSectionId metadataSection;
591  AsmSectionId dataSection;
592  AsmSectionId gotSection;
593  AsmSectionId savedSection;
594  AsmSectionId extraSectionCount;
595 
596  size_t prevSymbolsCount;
597 
598  bool unresolvedGlobals;
599  bool good;
600 
601  void addSymbols(bool sectionDiffsPrepared);
602 public:
604  explicit AsmROCmHandler(Assembler& assembler);
606  ~AsmROCmHandler();
607 
608  AsmKernelId addKernel(const char* kernelName);
609  AsmSectionId addSection(const char* sectionName, AsmKernelId kernelId);
610 
611  AsmSectionId getSectionId(const char* sectionName) const;
612  void setCurrentKernel(AsmKernelId kernel);
613  void setCurrentSection(AsmSectionId sectionId);
614 
615  SectionInfo getSectionInfo(AsmSectionId sectionId) const;
616  bool parsePseudoOp(const CString& firstName,
617  const char* stmtPlace, const char* linePtr);
618 
619  bool prepareBinary();
620  void writeBinary(std::ostream& os) const;
621  void writeBinary(Array<cxbyte>& array) const;
623  const ROCmInput* getOutput() const
624  { return &output; }
625 
626  bool prepareSectionDiffsResolving();
627 
628  // kcode support
629  bool isCodeSection() const;
630  KernelBase& getKernelBase(AsmKernelId index);
631  size_t getKernelsNum() const;
632 };
633 
634 };
635 
636 #endif
code of program or kernel
common definitions for assembler and disassembler
main class of assembler
Definition: Assembler.h:516
Assembler & assembler
assembler reference
Definition: AsmFormats.h:152
non copyable and non movable base structure (class)
Definition: Utilities.h:46
assembler expression class
Definition: AsmDefs.h:286
uint32_t Flags
type for declaring various flags
Definition: Utilities.h:100
dummy (empty) section for kernel
ROCm binary input structure.
Definition: ROCmBinaries.h:372
cxbyte userDataNum
number of user data
Definition: AsmFormats.h:353
no kernel, inner global space
Definition: AsmFormats.h:93
Flags flags
section flags
Definition: AsmFormats.h:134
configuration (global or for kernel)
bool privilegedMode
prvileged mode
Definition: AsmFormats.h:360
AMD Catalyst kernel&#39;s metadata.
handles raw code format
Definition: AsmFormats.h:247
Gallium input.
Definition: GalliumBinaries.h:443
AsmSectionType type
section type
Definition: AsmFormats.h:133
bool debugMode
debug mode
Definition: AsmFormats.h:359
AMD HSA kernel configuration structure.
Definition: Commons.h:64
ROCm binaries handling.
const char * name
section name
Definition: AsmFormats.h:132
an array class
Definition: Containers.h:41
bool isSectionDiffsResolvable() const
return true if format handler can resolve differences between sections
Definition: AsmFormats.h:165
bool ieeeMode
IEEE mode.
Definition: AsmFormats.h:354
cxbyte floatMode
float mode
Definition: AsmFormats.h:355
Configuration header.
cxuint AsmSectionId
type for Asm section id (index)
Definition: Commons.h:35
Definition: AsmFormats.h:146
handles ROCM binary format
Definition: AsmFormats.h:556
cxuint RelocType
relocation type
Definition: Commons.h:33
bool tgSize
enable TG_SIZE_EN bit
Definition: AsmFormats.h:358
data object
Definition: ROCmBinaries.h:55
empty section for scratch symbol
GalliumCompute binaries handling (only with LLVM 3.6)
assembler relocation
Definition: AsmDefs.h:265
const AmdCL2Input * getOutput() const
get output structure pointer
Definition: AsmFormats.h:459
AsmSectionType
assembler section type
Definition: AsmFormats.h:47
AMD binaries handling.
assembler format handler
Definition: AsmFormats.h:126
unsigned char cxbyte
unsigned byte
Definition: Config.h:229
AMD OpenCL2 binaries generator.
main namespace
Definition: AsmDefs.h:38
format handler with Kcode (kernel-code) handling
Definition: AsmFormats.h:219
unsigned int cxuint
unsigned int
Definition: Config.h:237
const GalliumInput * getOutput() const
get output object (input for bingenerator)
Definition: AsmFormats.h:544
cxuint AsmKernelId
type for Asm kernel id (index)
Definition: Commons.h:33
section information
Definition: AsmFormats.h:130
AMD Catalyst kernel&#39;s header.
no kernel, global space
Definition: AsmFormats.h:92
utilities for other libraries and programs
exception class
Definition: Utilities.h:61
AMD binaries generator.
main Input for AmdCL2GPUBinGenerator
Definition: AmdCL2BinGen.h:119
cxuint usedVGPRsNum
number of used VGPRs
Definition: AsmFormats.h:351
cxbyte priority
priority
Definition: AsmFormats.h:356
cxuint relSpace
relative space
Definition: AsmFormats.h:135
GPU identification utilities.
const AmdInput * getOutput() const
get output structure pointer
Definition: AsmFormats.h:343
main Input for AmdGPUBinGenerator
Definition: AmdBinGen.h:154
bool dx10Clamp
DX10 CLAMP mode.
Definition: AsmFormats.h:361
section is unresolvable
Definition: AsmFormats.h:101
const ROCmInput * getOutput() const
get output object (input for bingenerator)
Definition: AsmFormats.h:623
none section id
Definition: AsmFormats.h:87
cxuint dimMask
mask of dimension (bits: 0 - X, 1 - Y, 2 - Z)
Definition: AsmFormats.h:350
handles GalliumCompute format
Definition: AsmFormats.h:470
absolute section id
Definition: AsmFormats.h:86
cxuint usedSGPRsNum
number of used SGPRs
Definition: AsmFormats.h:352
assembler symbol structure
Definition: AsmDefs.h:143
cxbyte exceptions
enabled exceptions
Definition: AsmFormats.h:357
handles AMD Catalyst format
Definition: AsmFormats.h:273
handles AMD OpenCL 2.0 binary format
Definition: AsmFormats.h:367
assembler format exception
Definition: AsmFormats.h:114
Asm AMD HSA kernel configuration.
Definition: AsmFormats.h:348
simple C-string container
Definition: CString.h:38