CLRX  1
An unofficial OpenCL extensions designed for Radeon GPUs
Assembler.h
Go to the documentation of this file.
1 /*
2  * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3  * Copyright (C) 2014-2018 Mateusz Szpakowski
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
23 #ifndef __CLRX_ASSEMBLER_H__
24 #define __CLRX_ASSEMBLER_H__
25 
26 #include <CLRX/Config.h>
27 #include <algorithm>
28 #include <cstdint>
29 #include <string>
30 #include <istream>
31 #include <ostream>
32 #include <iostream>
33 #include <vector>
34 #include <utility>
35 #include <stack>
36 #include <list>
37 #include <unordered_set>
38 #include <unordered_map>
39 #include <CLRX/utils/Utilities.h>
40 #include <CLRX/utils/Containers.h>
41 #include <CLRX/utils/DTree.h>
42 #include <CLRX/utils/GPUId.h>
43 #include <CLRX/amdasm/Commons.h>
44 #include <CLRX/amdasm/AsmSource.h>
45 #include <CLRX/amdasm/AsmFormats.h>
46 #include <CLRX/amdasm/AsmDefs.h>
47 
49 namespace CLRX
50 {
51 
52 enum: cxuint {
53  ASM_POLICY_DEFAULT = CLRX_VERSION_NUMBER, // version 107
54  ASM_POLICY_UNIFIED_SGPR_COUNT = CLRX_POLICY_UNIFIED_SGPR_COUNT
55 };
56 
57 enum: Flags
58 {
63  ASM_MACRONOCASE = 16,
65  ASM_WAVE32 = 64,
66  ASM_TESTRESOLVE = (1U<<30),
67  ASM_TESTRUN = (1U<<31),
68  ASM_ALL = FLAGS_ALL&~(ASM_TESTRUN|ASM_TESTRESOLVE|ASM_BUGGYFPLIT|ASM_MACRONOCASE|
70 };
71 
72 enum: Flags
73 {
74  ASM_CODE_WAVE32 = 1
75 };
76 
77 struct AsmRegVar;
78 
81 {
82 public:
84  struct ReadPos
85  {
86  size_t chunkPos;
87  size_t itemPos;
88  };
89 
90 protected:
92  {
93  const AsmRegVar* regVar;
94  uint16_t rstart;
95  uint16_t rend;
97  cxbyte rwFlags:2;
98  cxbyte align:5;
99  cxbyte useRegMode:1; // usereg mode
100  uint16_t offsetLo;
101  };
102 
103  struct Chunk
104  {
105  size_t offsetFirst;
106  std::vector<RegVarUsageInt> items;
107  };
108 
109  std::vector<Chunk> chunks;
110 
112  explicit ISAUsageHandler();
113 public:
115  virtual ~ISAUsageHandler();
117  virtual ISAUsageHandler* copy() const = 0;
118 
120  void pushUsage(const AsmRegVarUsage& rvu);
122  bool hasNext(const ReadPos& readPos) const
123  { return readPos.chunkPos < chunks.size() && (readPos.chunkPos+1 != chunks.size() ||
124  readPos.itemPos < chunks.back().items.size());; }
126  AsmRegVarUsage nextUsage(ReadPos& readPos);
127  // find position by offset
128  ReadPos findPositionByOffset(size_t offset) const;
129 
131  virtual void getUsageDependencies(cxuint rvusNum, const AsmRegVarUsage* rvus,
132  cxbyte* linearDeps) const = 0;
133 };
134 
137 {
138 private:
139  std::vector<AsmRegVarLinearDep> regVarLinDeps;
140 public:
143 
145  void pushLinearDep(const AsmRegVarLinearDep& linearDep)
146  { regVarLinDeps.push_back(linearDep); }
148  size_t size() const
149  { return regVarLinDeps.size(); }
152  { return regVarLinDeps[pos]; }
154  size_t findPositionByOffset(size_t offset) const;
156  ISALinearDepHandler* copy() const;
157 };
158 
161 {
162 public:
164  GCNUsageHandler();
166  ~GCNUsageHandler();
167 
169  ISAUsageHandler* copy() const;
170 
171  void getUsageDependencies(cxuint rvusNum, const AsmRegVarUsage* rvus,
172  cxbyte* linearDeps) const;
173 };
174 
176 
183 {
184 public:
185  struct ReadPos
186  {
187  size_t delOpPos;
188  size_t waitInstrPos;
189  };
190 private:
191  std::vector<AsmDelayedOp> delayedOps;
192  std::vector<AsmWaitInstr> waitInstrs;
193 public:
195  ISAWaitHandler();
196 
198  void pushDelayedOp(const AsmDelayedOp& delOp)
199  { delayedOps.push_back(delOp); }
201  void pushWaitInstr(const AsmWaitInstr& waitInstr)
202  { waitInstrs.push_back(waitInstr); }
204  bool hasNext(const ReadPos& readPos) const
205  { return readPos.delOpPos < delayedOps.size() ||
206  readPos.waitInstrPos < waitInstrs.size(); }
208  bool nextInstr(ReadPos& readPos, AsmDelayedOp& delOp, AsmWaitInstr& waitInstr);
210  ReadPos findPositionByOffset(size_t offset) const;
212  ISAWaitHandler* copy() const;
213 };
214 
217 {
218 protected:
220 
222  void printWarning(const char* linePtr, const char* message);
224  void printError(const char* linePtr, const char* message);
226  void printWarning(const AsmSourcePos& sourcePos, const char* message);
228  void printError(const AsmSourcePos& sourcePos, const char* message);
230  void printWarningForRange(cxuint bits, uint64_t value, const AsmSourcePos& pos,
231  cxbyte signess = WS_BOTH);
232  void addCodeFlowEntry(AsmSectionId sectionId, const AsmCodeFlowEntry& entry);
234  explicit ISAAssembler(Assembler& assembler);
235 public:
237  virtual ~ISAAssembler();
239  virtual ISAUsageHandler* createUsageHandler() const = 0;
240 
242  virtual void assemble(const CString& mnemonic, const char* mnemPlace,
243  const char* linePtr, const char* lineEnd, std::vector<cxbyte>& output,
244  ISAUsageHandler* usageHandler, ISAWaitHandler* waitHandler) = 0;
246  virtual bool resolveCode(const AsmSourcePos& sourcePos, AsmSectionId targetSectionId,
247  cxbyte* sectionData, size_t offset, AsmExprTargetType targetType,
248  AsmSectionId sectionId, uint64_t value) = 0;
250  virtual bool checkMnemonic(const CString& mnemonic) const = 0;
251  virtual Flags getImportantCodeFlags() const = 0;
253  virtual void setAllocatedRegisters(const cxuint* regs = nullptr,
254  Flags regFlags = 0) = 0;
255  void setCodeFlags(Flags codeFlags);
257  virtual const cxuint* getAllocatedRegisters(size_t& regTypesNum,
258  Flags& regFlags) const = 0;
259  Flags getCodeFlags() const;
261  virtual void getMaxRegistersNum(size_t& regTypesNum, cxuint* maxRegs) const = 0;
263  virtual void getRegisterRanges(size_t& regTypesNum, cxuint* regRanges) const = 0;
265  virtual void fillAlignment(size_t size, cxbyte* output) = 0;
267  virtual bool parseRegisterRange(const char*& linePtr, cxuint& regStart,
268  cxuint& regEnd, const AsmRegVar*& regVar) = 0;
270  virtual bool relocationIsFit(cxuint bits, AsmExprTargetType tgtType) = 0;
272  virtual bool parseRegisterType(const char*& linePtr,
273  const char* end, cxuint& type) = 0;
275  virtual size_t getInstructionSize(size_t codeSize, const cxbyte* code) const = 0;
276  virtual const AsmWaitConfig& getWaitConfig() const = 0;
277 };
278 
281 {
282 public:
284  struct Regs {
288  };
289 private:
290  friend struct GCNAsmUtils; // INTERNAL LOGIC
291  union {
292  Regs regs;
293  cxuint regTable[2];
294  };
295  GPUArchMask curArchMask;
296  cxbyte currentRVUIndex;
297  AsmRegVarUsage instrRVUs[6];
298  bool hasWaitInstr;
299  AsmWaitInstr waitInstr;
300  AsmDelayedOp delayedOps[6];
301 
302  void resetInstrRVUs()
303  {
304  for (AsmRegVarUsage& rvu: instrRVUs)
305  {
306  rvu.useRegMode = false;
307  rvu.regField = ASMFIELD_NONE;
308  }
309  }
310  void resetWaitInstrs()
311  {
312  hasWaitInstr = false;
313  for (AsmDelayedOp& op: delayedOps)
314  op.delayedOpType = op.delayedOpType2 = ASMDELOP_NONE;
315  }
316 
317  void setCurrentRVU(cxbyte idx)
318  { currentRVUIndex = idx; }
319 
320  void setRegVarUsage(const AsmRegVarUsage& rvu);
321 
322  void moveRVUToNext(cxbyte index);
323  void setRVUFieldAndRWFlags(cxbyte index, AsmRegField rfield, cxbyte rwFlags);
324 
325  void flushInstrRVUs(ISAUsageHandler* usageHandler)
326  {
327  for (const AsmRegVarUsage& rvu: instrRVUs)
328  if (rvu.regField != ASMFIELD_NONE)
329  usageHandler->pushUsage(rvu);
330  }
331  void flushWaitInstrs(ISAWaitHandler* waitHandler)
332  {
333  for (const AsmDelayedOp& op: delayedOps)
334  if (op.delayedOpType != ASMDELOP_NONE)
335  waitHandler->pushDelayedOp(op);
336 
337  if (hasWaitInstr)
338  waitHandler->pushWaitInstr(waitInstr);
339  }
340 
341 public:
343  explicit GCNAssembler(Assembler& assembler);
345  ~GCNAssembler();
346 
347  ISAUsageHandler* createUsageHandler() const;
348 
349  void assemble(const CString& mnemonic, const char* mnemPlace, const char* linePtr,
350  const char* lineEnd, std::vector<cxbyte>& output,
351  ISAUsageHandler* usageHandler, ISAWaitHandler* waitHandler);
352  bool resolveCode(const AsmSourcePos& sourcePos, AsmSectionId targetSectionId,
353  cxbyte* sectionData, size_t offset, AsmExprTargetType targetType,
354  AsmSectionId sectionId, uint64_t value);
355  bool checkMnemonic(const CString& mnemonic) const;
356  Flags getImportantCodeFlags() const;
357  void setAllocatedRegisters(const cxuint* regs, Flags regFlags);
358  const cxuint* getAllocatedRegisters(size_t& regTypesNum, Flags& regFlags) const;
359  void getMaxRegistersNum(size_t& regTypesNum, cxuint* maxRegs) const;
360  void getRegisterRanges(size_t& regTypesNum, cxuint* regRanges) const;
361  void fillAlignment(size_t size, cxbyte* output);
362  bool parseRegisterRange(const char*& linePtr, cxuint& regStart, cxuint& regEnd,
363  const AsmRegVar*& regVar);
364  bool relocationIsFit(cxuint bits, AsmExprTargetType tgtType);
365  bool parseRegisterType(const char*& linePtr, const char* end, cxuint& type);
366  size_t getInstructionSize(size_t codeSize, const cxbyte* code) const;
367  const AsmWaitConfig& getWaitConfig() const;
368 };
369 
371 {
372 public:
373  struct NextBlock
374  {
375  size_t block;
376  bool isCall;
377  };
378  struct SSAInfo
379  {
380  size_t ssaIdBefore;
381  size_t ssaIdFirst; // SSA id at first change
382  size_t ssaId;
383  size_t ssaIdLast;
384  size_t ssaIdChange;
385  size_t firstPos;
386  size_t lastPos;
388  SSAInfo(size_t _bssaId = SIZE_MAX, size_t _ssaIdF = SIZE_MAX,
389  size_t _ssaId = SIZE_MAX, size_t _ssaIdL = SIZE_MAX,
390  size_t _ssaIdChange = 0, bool _readBeforeWrite = false)
391  : ssaIdBefore(_bssaId), ssaIdFirst(_ssaIdF), ssaId(_ssaId),
392  ssaIdLast(_ssaIdL), ssaIdChange(_ssaIdChange),
393  readBeforeWrite(_readBeforeWrite)
394  { }
395  };
396  struct CodeBlock
397  {
398  size_t start, end; // place in code
399  // next blocks rules:
400  // if only one and next block have index: empty.
401  // if have calls, then implicitly the last next block have next block index
402  // and it is not inserted into nexts list.
403  // otherwise nexts list contains next blocks
404  std::vector<NextBlock> nexts;
405  bool haveCalls;
406  bool haveReturn;
407  bool haveEnd;
408  // key - regvar, value - SSA info for this regvar
410  ISAUsageHandler::ReadPos usagePos;
411  };
412 
414 
415  // first - orig ssaid, second - dest ssaid
416  typedef std::pair<size_t, size_t> SSAReplace;
417  typedef std::unordered_map<AsmSingleVReg, VectorSet<SSAReplace> > SSAReplacesMap;
418  // interference graph type
420  typedef std::unordered_map<AsmSingleVReg, std::vector<size_t> > VarIndexMap;
421  struct LinearDep
422  {
423  cxbyte align;
424  VectorSet<size_t> prevVidxes;
425  VectorSet<size_t> nextVidxes;
426  };
427 
429  {
430  DTree<size_t> vs[MAX_REGTYPES_NUM];
431  };
432 private:
433  Assembler& assembler;
434  std::vector<CodeBlock> codeBlocks;
435  SSAReplacesMap ssaReplacesMap;
436  size_t regTypesNum;
437 
438  Array<OutLiveness> outLivenesses[MAX_REGTYPES_NUM];
439  size_t graphVregsCounts[MAX_REGTYPES_NUM];
440  VarIndexMap vregIndexMaps[MAX_REGTYPES_NUM]; // indices to igraph for 2 reg types
441  InterGraph interGraphs[MAX_REGTYPES_NUM]; // for 2 register
442  Array<cxuint> graphColorMaps[MAX_REGTYPES_NUM];
443  std::unordered_map<size_t, LinearDep> linearDepMaps[MAX_REGTYPES_NUM];
444  // key - routine block, value - set of svvregs (lv indexes) used in routine
445  std::unordered_map<size_t, VIdxSetEntry> vidxRoutineMap;
446  // key - call block, value - set of svvregs (lv indexes) used between this call point
447  std::unordered_map<size_t, VIdxSetEntry> vidxCallMap;
448 
449 public:
450  AsmRegAllocator(Assembler& assembler);
451  // constructor for testing
452  AsmRegAllocator(Assembler& assembler, const std::vector<CodeBlock>& codeBlocks,
453  const SSAReplacesMap& ssaReplacesMap);
454 
455  void createCodeStructure(const std::vector<AsmCodeFlowEntry>& codeFlow,
456  size_t codeSize, const cxbyte* code);
457  void createSSAData(ISAUsageHandler& usageHandler,
458  ISALinearDepHandler& linDepHandler);
459  void applySSAReplaces();
460  void createLivenesses(ISAUsageHandler& usageHandler,
461  ISALinearDepHandler& linDepHandler);
462  void createInterferenceGraph();
463  void colorInterferenceGraph();
464 
465  void allocateRegisters(AsmSectionId sectionId);
466 
467  const std::vector<CodeBlock>& getCodeBlocks() const
468  { return codeBlocks; }
469  const SSAReplacesMap& getSSAReplacesMap() const
470  { return ssaReplacesMap; }
471  const Array<OutLiveness>* getOutLivenesses() const
472  { return outLivenesses; }
473 
474  const std::unordered_map<size_t, LinearDep>* getLinearDepMaps() const
475  { return linearDepMaps; }
476 
477  const VarIndexMap* getVregIndexMaps() const
478  { return vregIndexMaps; }
479 
480  const std::unordered_map<size_t, VIdxSetEntry>& getVIdxRoutineMap() const
481  { return vidxRoutineMap; }
482  const std::unordered_map<size_t, VIdxSetEntry>& getVIdxCallMap() const
483  { return vidxCallMap; }
484 };
485 
488 {
489 private:
490  const AsmWaitConfig& waitConfig;
491  Assembler& assembler;
492  const std::vector<AsmRegAllocator::CodeBlock>& codeBlocks;
493  const AsmRegAllocator::VarIndexMap* vregIndexMaps;
494  const Array<cxuint>* graphColorMaps;
495  bool onlyWarnings;
496  std::vector<AsmWaitInstr> neededWaitInstrs;
497 public:
498  AsmWaitScheduler(const AsmWaitConfig& asmWaitConfig, Assembler& assembler,
499  const std::vector<AsmRegAllocator::CodeBlock>& codeBlocks,
500  const AsmRegAllocator::VarIndexMap* vregIndexMaps,
501  const Array<cxuint>* graphColorMaps, bool onlyWarnings);
502 
503  void schedule(ISAUsageHandler& usageHandler, ISAWaitHandler& waitHandler);
504 
505  const std::vector<AsmWaitInstr>& getNeededWaitInstrs() const
506  { return neededWaitInstrs; }
507 };
508 
510 enum class AsmClauseType
511 {
512  IF,
513  ELSEIF,
514  ELSE,
515  REPEAT,
516  MACRO
517 };
518 
520 struct AsmClause
521 {
526 };
527 
530 {
531 public:
533  typedef std::pair<CString, uint64_t> DefSym;
535  typedef std::unordered_map<CString, AsmKernelId> KernelMap;
536 private:
537  friend class AsmStreamInputFilter;
538  friend class AsmMacroInputFilter;
539  friend class AsmForInputFilter;
540  friend class AsmExpression;
541  friend class AsmFormatHandler;
542  friend class AsmKcodeHandler;
543  friend class AsmRawCodeHandler;
544  friend class AsmAmdHandler;
545  friend class AsmAmdCL2Handler;
546  friend class AsmGalliumHandler;
547  friend class AsmROCmHandler;
548  friend class ISAAssembler;
549  friend class AsmRegAllocator;
550  friend class AsmWaitScheduler;
551 
552  friend struct AsmParseUtils; // INTERNAL LOGIC
553  friend struct AsmPseudoOps; // INTERNAL LOGIC
554  friend struct AsmKcodePseudoOps; // INTERNAL LOGIC
555  friend struct AsmGalliumPseudoOps; // INTERNAL LOGIC
556  friend struct AsmAmdPseudoOps; // INTERNAL LOGIC
557  friend struct AsmAmdCL2PseudoOps; // INTERNAL LOGIC
558  friend struct AsmROCmPseudoOps; // INTERNAL LOGIC
559  friend struct GCNAsmUtils; // INTERNAL LOGIC
560 
561  Array<CString> filenames;
562  BinaryFormat format;
563  GPUDeviceType deviceType;
564  uint32_t driverVersion;
565  uint32_t llvmVersion; // GalliumCompute
566  bool _64bit;
567  bool newROCmBinFormat;
568  bool llvm10BinFormat;
569  bool rocmMetadataV3;
570  bool good;
571  bool resolvingRelocs;
572  bool doNotRemoveFromSymbolClones;
573  cxuint policyVersion;
574  ISAAssembler* isaAssembler;
575  std::vector<DefSym> defSyms;
576  std::vector<CString> includeDirs;
577  std::vector<AsmSection> sections;
578  std::vector<Array<AsmSectionId> > relSpacesSections;
579  std::unordered_set<AsmSymbolEntry*> symbolSnapshots;
580  std::unordered_set<AsmSymbolEntry*> symbolClones;
581  std::vector<AsmExpression*> unevalExpressions;
582  std::vector<AsmRelocation> relocations;
583  std::unordered_map<const AsmRegVar*, AsmRegVarLinears> regVarLinearsMap;
584  AsmScope globalScope;
585  AsmMacroMap macroMap;
586  std::stack<AsmScope*> scopeStack;
587  std::vector<AsmScope*> abandonedScopes;
588  AsmScope* currentScope;
589  KernelMap kernelMap;
590  std::vector<AsmKernel> kernels;
591  Flags flags;
592  uint64_t macroCount;
593  uint64_t localCount; // macro's local count
594  bool alternateMacro;
595  bool buggyFPLit;
596  bool macroCase;
597  bool oldModParam;
598  Flags codeFlags;
599 
600  cxuint inclusionLevel;
601  cxuint macroSubstLevel;
602  cxuint repetitionLevel;
603  bool lineAlreadyRead; // if line already read
604 
605  size_t lineSize;
606  const char* line;
607  bool endOfAssembly;
608  bool sectionDiffsPrepared;
609  bool collectSourcePoses;
610 
611  cxuint filenameIndex;
612  std::stack<AsmInputFilter*> asmInputFilters;
613  AsmInputFilter* currentInputFilter;
614 
615  std::ostream& messageStream;
616  std::ostream& printStream;
617 
618  AsmFormatHandler* formatHandler;
619 
620  std::stack<AsmClause> clauses;
621 
622  AsmKernelId currentKernel;
623  AsmSectionId& currentSection;
624  uint64_t& currentOutPos;
625 
626  bool withSectionDiffs() const
627  { return formatHandler!=nullptr && formatHandler->isSectionDiffsResolvable(); }
628 
629  AsmSourcePos getSourcePos(LineCol lineCol) const
630  {
631  return { currentInputFilter->getMacroSubst(), currentInputFilter->getSource(),
632  lineCol.lineNo, lineCol.colNo };
633  }
634 
635  AsmSourcePos getSourcePos(size_t pos) const
636  { return currentInputFilter->getSourcePos(pos); }
637  AsmSourcePos getSourcePos(const char* linePtr) const
638  { return getSourcePos(linePtr-line); }
639 
640  void printWarning(const AsmSourcePos& pos, const char* message);
641  void printError(const AsmSourcePos& pos, const char* message);
642 
643  void printWarning(const char* linePtr, const char* message)
644  { printWarning(getSourcePos(linePtr), message); }
645  void printError(const char* linePtr, const char* message)
646  { printError(getSourcePos(linePtr), message); }
647 
648  void printWarning(LineCol lineCol, const char* message)
649  { printWarning(getSourcePos(lineCol), message); }
650  void printError(LineCol lineCol, const char* message)
651  { printError(getSourcePos(lineCol), message); }
652 
653  LineCol translatePos(const char* linePtr) const
654  { return currentInputFilter->translatePos(linePtr-line); }
655  LineCol translatePos(size_t pos) const
656  { return currentInputFilter->translatePos(pos); }
657 
658  bool parseLiteral(uint64_t& value, const char*& linePtr);
659  bool parseLiteralNoError(uint64_t& value, const char*& linePtr);
660  bool parseString(std::string& outString, const char*& linePtr);
661 
662  enum class ParseState
663  {
664  FAILED = 0,
665  PARSED,
666  MISSING // missing element
667  };
668 
672  ParseState parseSymbol(const char*& linePtr, AsmSymbolEntry*& entry,
673  bool localLabel = true, bool dontCreateSymbol = false);
674  bool skipSymbol(const char*& linePtr);
675 
676  bool setSymbol(AsmSymbolEntry& symEntry, uint64_t value, AsmSectionId sectionId);
677 
678  bool assignSymbol(const CString& symbolName, const char* symbolPlace,
679  const char* linePtr, bool reassign = true, bool baseExpr = false);
680 
681  bool assignOutputCounter(const char* symbolPlace, uint64_t value,
682  AsmSectionId sectionId, cxbyte fillValue = 0);
683 
684  void parsePseudoOps(const CString& firstName, const char* stmtPlace,
685  const char* linePtr);
686 
688  bool skipClauses(bool exitm = false);
689  bool putMacroContent(RefPtr<AsmMacro> macro);
690  bool putRepetitionContent(AsmRepeat& repeat);
691 
692  void initializeOutputFormat();
693 
694  bool pushClause(const char* string, AsmClauseType clauseType)
695  {
696  bool included; // to ignore
697  return pushClause(string, clauseType, true, included);
698  }
699  bool pushClause(const char* string, AsmClauseType clauseType,
700  bool satisfied, bool& included);
701  // return false when failed (for example no clauses)
702  bool popClause(const char* string, AsmClauseType clauseType);
703 
704  // recursive function to find scope in scope
705  AsmScope* findScopeInScope(AsmScope* scope, const CString& scopeName,
706  std::unordered_set<AsmScope*>& scopeSet);
707  // find scope by identifier
708  AsmScope* getRecurScope(const CString& scopePlace, bool ignoreLast = false,
709  const char** lastStep = nullptr);
710  // find symbol in scopes
711  // internal recursive function to find symbol in scope
712  AsmSymbolEntry* findSymbolInScopeInt(AsmScope* scope, const CString& symName,
713  std::unordered_set<AsmScope*>& scopeSet);
714  // scope - return scope from scoped name
715  AsmSymbolEntry* findSymbolInScope(const CString& symName, AsmScope*& scope,
716  CString& sameSymName, bool insertMode = false);
717  // similar to map::insert, but returns pointer
718  std::pair<AsmSymbolEntry*, bool> insertSymbolInScope(const CString& symName,
719  const AsmSymbol& symbol);
720 
721  // internal recursive function to find symbol in scope
722  AsmRegVarEntry* findRegVarInScopeInt(AsmScope* scope, const CString& rvName,
723  std::unordered_set<AsmScope*>& scopeSet);
724  // scope - return scope from scoped name
725  AsmRegVarEntry* findRegVarInScope(const CString& rvName, AsmScope*& scope,
726  CString& sameRvName, bool insertMode = false);
727  // similar to map::insert, but returns pointer
728  std::pair<AsmRegVarEntry*, bool> insertRegVarInScope(const CString& rvName,
729  const AsmRegVar& regVar);
730 
731  // create scope
732  bool getScope(AsmScope* parent, const CString& scopeName, AsmScope*& scope);
733  // push new scope level
734  bool pushScope(const CString& scopeName);
735  bool popScope();
736 
738  bool includeFile(const char* pseudoOpPlace, const std::string& filename);
739 
740  ParseState makeMacroSubstitution(const char* string);
741 
742  bool parseMacroArgValue(const char*& linePtr, std::string& outStr);
743 
744  void putData(size_t size, const cxbyte* data)
745  {
746  AsmSection& section = sections[currentSection];
747  section.content.insert(section.content.end(), data, data+size);
748  currentOutPos += size;
749  }
750 
751  cxbyte* reserveData(size_t size, cxbyte fillValue = 0);
752 
753  void goToMain(const char* pseudoOpPlace);
754  void goToKernel(const char* pseudoOpPlace, const char* kernelName);
755  void goToSection(const char* pseudoOpPlace, const char* sectionName, uint64_t align=0);
756  void goToSection(const char* pseudoOpPlace, const char* sectionName,
757  AsmSectionType type, Flags flags, uint64_t align=0);
758  void goToSection(const char* pseudoOpPlace, AsmSectionId sectionId, uint64_t align=0);
759 
760  void printWarningForRange(cxuint bits, uint64_t value, const AsmSourcePos& pos,
761  cxbyte signess = WS_BOTH);
762 
763  bool isAddressableSection() const
764  {
765  return currentSection==ASMSECT_ABS ||
766  (sections[currentSection].flags & ASMSECT_ADDRESSABLE) != 0;
767  }
768  bool isWriteableSection() const
769  {
770  return currentSection!=ASMSECT_ABS &&
771  (sections[currentSection].flags & ASMSECT_WRITEABLE) != 0;
772  }
773  bool isResolvableSection() const
774  {
775  return currentSection==ASMSECT_ABS ||
776  (sections[currentSection].flags & ASMSECT_UNRESOLVABLE) == 0;
777  }
778  bool isResolvableSection(AsmSectionId sectionId) const
779  {
780  return sectionId==ASMSECT_ABS ||
781  (sections[sectionId].flags & ASMSECT_UNRESOLVABLE) == 0;
782  }
783 
784  // oldKernels and newKernels must be sorted
785  void handleRegionsOnKernels(const std::vector<AsmKernelId>& newKernels,
786  const std::vector<AsmKernelId>& oldKernels, AsmSectionId codeSection);
787 
788  void tryToResolveSymbol(AsmSymbolEntry& symEntry);
789  void tryToResolveSymbols(AsmScope* scope);
790  void printUnresolvedSymbols(AsmScope* scope);
791 
792  bool resolveExprTarget(const AsmExpression* expr, uint64_t value,
793  AsmSectionId sectionId);
794 
795  void cloneSymEntryIfNeeded(AsmSymbolEntry& symEntry);
796 
797  void undefineSymbol(AsmSymbolEntry& symEntry);
798 
799 protected:
801  bool readLine();
802 public:
804 
813  explicit Assembler(const CString& filename, std::istream& input, Flags flags = 0,
816  std::ostream& msgStream = std::cerr, std::ostream& printStream = std::cout);
817 
819 
827  explicit Assembler(const Array<CString>& filenames, Flags flags = 0,
830  std::ostream& msgStream = std::cerr, std::ostream& printStream = std::cout);
832  ~Assembler();
833 
835  bool assemble();
836 
838  void writeBinary(const char* filename) const;
840  void writeBinary(std::ostream& outStream) const;
842  void writeBinary(Array<cxbyte>& array) const;
843 
845  uint32_t getDriverVersion() const
846  { return driverVersion; }
848  void setDriverVersion(uint32_t driverVersion)
849  { this->driverVersion = driverVersion; }
850 
852  uint32_t getLLVMVersion() const
853  { return llvmVersion; }
855  void setLLVMVersion(uint32_t llvmVersion)
856  { this->llvmVersion = llvmVersion; }
857 
860  { return deviceType; }
862  void setDeviceType(const GPUDeviceType deviceType)
863  { this->deviceType = deviceType; }
866  { return format; }
869  { format = binFormat; }
871  bool is64Bit() const
872  { return _64bit; }
874  void set64Bit(bool this64Bit)
875  { _64bit = this64Bit; }
877  bool isNewROCmBinFormat() const
878  { return newROCmBinFormat; }
880  void setNewROCmBinFormat(bool newFmt)
881  { newROCmBinFormat = newFmt; }
883  bool isLLVM10BinFormat() const
884  { return llvm10BinFormat; }
886  void setLLVM10BinFormat(bool newFmt)
887  { llvm10BinFormat = newFmt; }
889  bool isROCmMetadataV3() const
890  { return rocmMetadataV3; }
892  void setROCmMetadataV3(bool newFmt)
893  { rocmMetadataV3 = newFmt; }
896  { return policyVersion; }
899  { policyVersion = pv; }
901  Flags getFlags() const
902  { return flags; }
904  void setFlags(Flags flags)
905  { this->flags = flags; }
908  { return codeFlags; }
910  void setCodeFlags(Flags flags)
911  { this->codeFlags = flags; }
913  bool isAltMacro() const
914  { return alternateMacro; }
916  bool isMacroCase() const
917  { return macroCase; }
919  bool isOldModParam() const
920  { return oldModParam; }
922  bool isBuggyFPLit() const
923  { return buggyFPLit; }
925  const std::vector<CString>& getIncludeDirs() const
926  { return includeDirs; }
928  void addIncludeDir(const CString& includeDir);
930  const AsmSymbolMap& getSymbolMap() const
931  { return globalScope.symbolMap; }
933  const std::vector<AsmSection>& getSections() const
934  { return sections; }
935  // get first sections for rel spaces
936  const std::vector<Array<AsmSectionId> >& getRelSpacesSections() const
937  { return relSpacesSections; }
939  const KernelMap& getKernelMap() const
940  { return kernelMap; }
942  const std::vector<AsmKernel>& getKernels() const
943  { return kernels; }
945  const AsmRegVarMap& getRegVarMap() const
946  { return globalScope.regVarMap; }
948  bool addRegVar(const CString& name, const AsmRegVar& var)
949  { return insertRegVarInScope(name, var).second; }
951  bool getRegVar(const CString& name, const AsmRegVar*& regVar);
952 
954  const AsmScope& getGlobalScope() const
955  { return globalScope; }
956 
958  bool isAbsoluteSymbol(const AsmSymbol& symbol) const;
959 
961  void addInitialDefSym(const CString& symName, uint64_t value);
962 
965  { return formatHandler; }
968  { return isaAssembler; }
969 };
970 
971 inline void ISAAssembler::printWarning(const char* linePtr, const char* message)
972 { assembler.printWarning(linePtr, message); }
973 
974 inline void ISAAssembler::printError(const char* linePtr, const char* message)
975 { assembler.printError(linePtr, message); }
976 
977 inline void ISAAssembler::printWarningForRange(cxuint bits, uint64_t value,
978  const AsmSourcePos& pos, cxbyte signess)
979 { assembler.printWarningForRange(bits, value, pos, signess); }
980 
981 inline void ISAAssembler::printWarning(const AsmSourcePos& sourcePos, const char* message)
982 { assembler.printWarning(sourcePos, message); }
983 
984 inline void ISAAssembler::printError(const AsmSourcePos& sourcePos, const char* message)
985 { assembler.printError(sourcePos, message); }
986 
987 inline void ISAAssembler::addCodeFlowEntry(AsmSectionId sectionId,
988  const AsmCodeFlowEntry& entry)
989 { assembler.sections[sectionId].addCodeFlowEntry(entry); }
990 
991 inline void ISAAssembler::setCodeFlags(Flags codeFlags)
992 { assembler.setCodeFlags(codeFlags); }
993 
994 inline Flags ISAAssembler::getCodeFlags() const
995 { return assembler.getCodeFlags(); }
996 
997 };
998 
999 #endif
AsmRepeatInputFilter or AsmIRPInputFilter.
bool is64Bit() const
get bitness (true if 64-bit)
Definition: Assembler.h:871
void set64Bit(bool this64Bit)
set bitness (true if 64-bit)
Definition: Assembler.h:874
AsmRegVarMap regVarMap
regvar map
Definition: AsmDefs.h:652
common definitions for assembler and disassembler
Definition: Assembler.h:373
main class of assembler
Definition: Assembler.h:529
bool haveReturn
code have return from routine
Definition: Assembler.h:406
AsmRegField regField
place in instruction
Definition: Assembler.h:96
bool isLLVM10BinFormat() const
is new ROCm LLVM10 binary format
Definition: Assembler.h:883
non copyable and non movable base structure (class)
Definition: Utilities.h:46
assembler expression class
Definition: AsmDefs.h:286
uint16_t rstart
register start
Definition: Assembler.h:94
uint32_t Flags
type for declaring various flags
Definition: Utilities.h:100
Flags getFlags() const
get flags
Definition: Assembler.h:901
std::unordered_map< CString, AsmRegVar > AsmRegVarMap
regvar map
Definition: AsmDefs.h:535
assembler repeat
Definition: AsmSource.h:236
virtual ~ISAUsageHandler()
destructor
AMD CATALYST format.
size_t ssaIdChange
number of SSA id changes
Definition: Assembler.h:384
GPUDeviceType getDeviceType() const
get GPU device type
Definition: Assembler.h:859
AsmRegVarMap::value_type AsmRegVarEntry
regvar entry
Definition: AsmDefs.h:537
BinaryFormat
binary format for Assembler/Disassembler
Definition: Commons.h:38
const std::vector< AsmSection > & getSections() const
get sections
Definition: Assembler.h:933
ColNo colNo
column number, for macro substitution and IRP points to column preprocessed line
Definition: AsmSource.h:50
void setDriverVersion(uint32_t driverVersion)
set AMD driver version
Definition: Assembler.h:848
reference pointer based on Glibmm refptr
Definition: Utilities.h:860
uint16_t GPUArchMask
GPU architecture mask (one bit represents single GPU architecture)
Definition: GPUId.h:110
AsmClauseType
type of clause
Definition: Assembler.h:510
Assembler & assembler
assembler
Definition: Assembler.h:219
const AsmRegVarMap & getRegVarMap() const
get regvar map
Definition: Assembler.h:945
delayed result for register for instruction with delayed results
Definition: AsmDefs.h:600
assembler section
Definition: AsmDefs.h:692
DTree container (kind of B-Tree)
assembler macro input filter (for macro filtering)
Definition: AsmSource.h:447
std::pair< CString, uint64_t > DefSym
defined symbol entry
Definition: Assembler.h:533
bool isBuggyFPLit() const
get true if buggyFPLit enabled
Definition: Assembler.h:922
AsmRegVarUsage nextUsage(ReadPos &readPos)
get next usage
handles raw code format
Definition: AsmFormats.h:251
Definition: Assembler.h:428
assembler input layout filter
Definition: AsmSource.h:413
assembler scope for symbol, macros, regvars
Definition: AsmDefs.h:648
bool isNewROCmBinFormat() const
is new ROCm binary format
Definition: Assembler.h:877
Regvar info structure.
Definition: AsmDefs.h:510
void pushLinearDep(const AsmRegVarLinearDep &linearDep)
push linear dependency
Definition: Assembler.h:145
void printError(const char *linePtr, const char *message)
print error for position pointed by line pointer
Definition: Assembler.h:974
bool hasNext(const ReadPos &readPos) const
return true if has next instruction
Definition: Assembler.h:204
void printWarning(const char *linePtr, const char *message)
print warning for position pointed by line pointer
Definition: Assembler.h:971
cxuint sgprsNum
SGPRs number.
Definition: Assembler.h:285
virtual ISAUsageHandler * copy() const =0
copy this usage handler
an array class
Definition: Containers.h:41
std::vector< NextBlock > nexts
nexts blocks, if empty then direct next block
Definition: Assembler.h:404
Flags regFlags
define what extra register must be included
Definition: Assembler.h:287
bool isSectionDiffsResolvable() const
return true if format handler can resolve differences between sections
Definition: AsmFormats.h:166
void pushUsage(const AsmRegVarUsage &rvu)
push regvar or register usage
Configuration header.
cxuint AsmSectionId
type for Asm section id (index)
Definition: Commons.h:35
handles ROCM binary format
Definition: AsmFormats.h:564
size_t firstPos
first position in code block (section offset)
Definition: Assembler.h:385
Assembler Wait scheduler.
Definition: Assembler.h:487
size_t ssaId
original SSA id
Definition: Assembler.h:382
void setROCmMetadataV3(bool newFmt)
is new ROCm metadata V3 format
Definition: Assembler.h:892
line and column
Definition: AsmSource.h:45
std::unordered_map< CString, AsmSymbol > AsmSymbolMap
assembler symbol map
Definition: AsmDefs.h:206
internal structure for regvar linear dependencies
Definition: AsmDefs.h:562
assembler &#39;for&#39; pseudo-op input filter
Definition: AsmSource.h:499
an assembler formats
virtual void getUsageDependencies(cxuint rvusNum, const AsmRegVarUsage *rvus, cxbyte *linearDeps) const =0
get usage dependencies around single instruction
void setFlags(Flags flags)
set flags
Definition: Assembler.h:904
void pushWaitInstr(const AsmWaitInstr &waitInstr)
wait instruction
Definition: Assembler.h:201
all flags
Definition: Assembler.h:68
AsmSourcePos sourcePos
position in source code
Definition: Assembler.h:523
void setCodeFlags(Flags flags)
set code flags
Definition: Assembler.h:910
Definition: Assembler.h:185
cxuint getPolicyVersion() const
get policy version
Definition: Assembler.h:895
#define CLRX_VERSION_NUMBER
CLRadeonExtender version number.
Definition: Config.h:39
wait handler
Definition: Assembler.h:182
if failed now, no later trial
AsmSectionType
assembler section type
Definition: AsmFormats.h:47
assembler format handler
Definition: AsmFormats.h:126
void setPolicyVersion(cxuint pv)
set policy version
Definition: Assembler.h:898
enable altmacro mode
Definition: Assembler.h:61
RefPtr< const AsmSource > getSource() const
get current source after reading line
Definition: AsmSource.h:392
unsigned char cxbyte
unsigned byte
Definition: Config.h:229
enable resolving symbols if ASM_TESTRUN enabled
Definition: Assembler.h:66
const std::vector< CString > & getIncludeDirs() const
get include directory list
Definition: Assembler.h:925
cxuint vgprsNum
VGPRs number.
Definition: Assembler.h:286
std::unordered_map< CString, RefPtr< const AsmMacro > > AsmMacroMap
assembler macro map
Definition: AsmDefs.h:640
AsmSourcePos prevIfPos
position of previous if-clause
Definition: Assembler.h:525
size_t ssaIdLast
last SSA id in last
Definition: Assembler.h:383
main namespace
Definition: AsmDefs.h:38
const AsmRegVar * regVar
if null, then usage of called register
Definition: Assembler.h:93
LineCol translatePos(size_t position) const
translate position to line number and column number
size_t lastPos
last position in code block (section offset)
Definition: Assembler.h:386
void setNewROCmBinFormat(bool newFmt)
set new ROCm binary format
Definition: Assembler.h:880
bool isAltMacro() const
get true if altMacro enabled
Definition: Assembler.h:913
bool isMacroCase() const
get true if macroCase enabled
Definition: Assembler.h:916
AsmClauseType type
type of clause
Definition: Assembler.h:522
Definition: Assembler.h:370
format handler with Kcode (kernel-code) handling
Definition: AsmFormats.h:221
unsigned int cxuint
unsigned int
Definition: Config.h:237
const KernelMap & getKernelMap() const
get kernel map
Definition: Assembler.h:939
Definition: Assembler.h:103
BinaryFormat getBinaryFormat() const
get binary format
Definition: Assembler.h:865
buggy handling of fpliterals (including fp constants)
Definition: Assembler.h:62
const AsmSymbolMap & getSymbolMap() const
get symbols map
Definition: Assembler.h:930
std::unordered_map< CString, AsmKernelId > KernelMap
kernel map type
Definition: Assembler.h:535
cxbyte AsmExprTargetType
expression target type (one byte)
Definition: AsmDefs.h:59
macro substitution
std::vector< cxbyte > content
content of section
Definition: AsmDefs.h:702
AsmSymbolMap::value_type AsmSymbolEntry
assembler symbol entry
Definition: AsmDefs.h:208
use WAVESIZE32
Definition: Assembler.h:65
cxuint AsmKernelId
type for Asm kernel id (index)
Definition: Commons.h:33
Definition: Assembler.h:421
GCN (register and regvar) Usage handler.
Definition: Assembler.h:160
AsmRegVarLinearDep getLinearDep(size_t pos) const
get next linear dependency
Definition: Assembler.h:151
GPUDeviceType
type of GPU device
Definition: GPUId.h:51
bool hasNext(const ReadPos &readPos) const
has next regvar usage
Definition: Assembler.h:122
void setLLVM10BinFormat(bool newFmt)
set new ROCm LLVM10 binary format
Definition: Assembler.h:886
size_t size() const
return true if has next
Definition: Assembler.h:148
only for running tests
Definition: Assembler.h:67
enable all warnings for assembler
Definition: Assembler.h:59
void setDeviceType(const GPUDeviceType deviceType)
set GPU device type
Definition: Assembler.h:862
utilities for other libraries and programs
bool haveCalls
code have calls at its end
Definition: Assembler.h:405
bool readBeforeWrite
have read before write
Definition: Assembler.h:387
disable case-insensitive naming (default)
Definition: Assembler.h:64
GCN arch assembler.
Definition: Assembler.h:280
size_t ssaIdBefore
SSA id before first SSA in block.
Definition: Assembler.h:380
AsmSymbolMap symbolMap
symbol map
Definition: AsmDefs.h:651
register pool numbers
Definition: Assembler.h:284
Definition: Assembler.h:91
uint32_t getLLVMVersion() const
get LLVM version
Definition: Assembler.h:852
assembler&#39;s clause (if,else,macro,rept)
Definition: Assembler.h:520
regvar usage in code
Definition: AsmDefs.h:549
AsmSourcePos getSourcePos(size_t position) const
get source position after reading line
Definition: AsmSource.h:399
Definition: Assembler.h:378
GPU identification utilities.
LineNo lineNo
line number
Definition: AsmSource.h:47
bool addRegVar(const CString &name, const AsmRegVar &var)
add regvar
Definition: Assembler.h:948
const ISAAssembler * getISAAssembler() const
get ISA assembler
Definition: Assembler.h:967
stgructure that hold read position to store later
Definition: Assembler.h:84
section is unresolvable
Definition: AsmFormats.h:101
code flow entry
Definition: AsmDefs.h:632
uint32_t getDriverVersion() const
get AMD driver version
Definition: Assembler.h:845
const AsmFormatHandler * getFormatHandler() const
get format handler
Definition: Assembler.h:964
description of the WAIT instruction (for waiting for results)
Definition: AsmDefs.h:614
bool haveEnd
code have end
Definition: Assembler.h:407
ISAUsageHandler()
constructor
void printWarningForRange(cxuint bits, uint64_t value, const AsmSourcePos &pos, cxbyte signess=WS_BOTH)
print warning about integer out of range
Definition: Assembler.h:977
handles GalliumCompute format
Definition: AsmFormats.h:477
const AsmScope & getGlobalScope() const
get global scope
Definition: Assembler.h:954
void pushDelayedOp(const AsmDelayedOp &delOp)
push delayed result
Definition: Assembler.h:198
absolute section id
Definition: AsmFormats.h:86
an assembler sources handling
assembler input filter for reading lines
Definition: AsmSource.h:350
void setLLVMVersion(uint32_t llvmVersion)
set LLVM version
Definition: Assembler.h:855
assembler symbol structure
Definition: AsmDefs.h:143
RefPtr< const AsmMacroSubst > getMacroSubst() const
get current macro substitution after reading line
Definition: AsmSource.h:395
asm wait system configuration
Definition: AsmDefs.h:586
bool isOldModParam() const
get true if oldModParam enabled (old modifier parametrization)
Definition: Assembler.h:919
cxbyte AsmRegField
type of register field
Definition: AsmDefs.h:500
ISA regvar linear handler.
Definition: Assembler.h:136
force add symbols to binary
Definition: Assembler.h:60
ISA (register and regvar) Usage handler.
Definition: Assembler.h:80
handles AMD Catalyst format
Definition: AsmFormats.h:277
handles AMD OpenCL 2.0 binary format
Definition: AsmFormats.h:373
Definition: Assembler.h:396
Flags getCodeFlags() const
get code flags
Definition: Assembler.h:907
ISA assembler class.
Definition: Assembler.h:216
simple C-string container
Definition: CString.h:38
an assembler for Radeon GPU&#39;s
containers and other utils for other libraries and programs
void setBinaryFormat(BinaryFormat binFormat)
set binary format
Definition: Assembler.h:868
uint16_t rend
register end
Definition: Assembler.h:95
bool condSatisfied
if conditional clause has already been satisfied
Definition: Assembler.h:524
bool isROCmMetadataV3() const
is new ROCm metadata V3 format
Definition: Assembler.h:889
const std::vector< AsmKernel > & getKernels() const
get kernels
Definition: Assembler.h:942
assembler source position
Definition: AsmSource.h:150