CLRX  1
An unofficial OpenCL extensions designed for Radeon GPUs
Assembler.h
Go to the documentation of this file.
1 /*
2  * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library
3  * Copyright (C) 2014-2018 Mateusz Szpakowski
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  */
23 #ifndef __CLRX_ASSEMBLER_H__
24 #define __CLRX_ASSEMBLER_H__
25 
26 #include <CLRX/Config.h>
27 #include <algorithm>
28 #include <cstdint>
29 #include <string>
30 #include <istream>
31 #include <ostream>
32 #include <iostream>
33 #include <vector>
34 #include <utility>
35 #include <stack>
36 #include <list>
37 #include <unordered_set>
38 #include <unordered_map>
39 #include <CLRX/utils/Utilities.h>
40 #include <CLRX/utils/Containers.h>
41 #include <CLRX/utils/DTree.h>
42 #include <CLRX/utils/GPUId.h>
43 #include <CLRX/amdasm/Commons.h>
44 #include <CLRX/amdasm/AsmSource.h>
45 #include <CLRX/amdasm/AsmFormats.h>
46 #include <CLRX/amdasm/AsmDefs.h>
47 
49 namespace CLRX
50 {
51 
52 enum: cxuint {
53  ASM_POLICY_DEFAULT = CLRX_VERSION_NUMBER, // version 107
54  ASM_POLICY_UNIFIED_SGPR_COUNT = CLRX_POLICY_UNIFIED_SGPR_COUNT
55 };
56 
57 enum: Flags
58 {
63  ASM_MACRONOCASE = 16,
65  ASM_TESTRESOLVE = (1U<<30),
66  ASM_TESTRUN = (1U<<31),
67  ASM_ALL = FLAGS_ALL&~(ASM_TESTRUN|ASM_TESTRESOLVE|ASM_BUGGYFPLIT|ASM_MACRONOCASE|
69 };
70 
71 struct AsmRegVar;
72 
75 {
76 public:
78  struct ReadPos
79  {
80  size_t chunkPos;
81  size_t itemPos;
82  };
83 
84 protected:
86  {
87  const AsmRegVar* regVar;
88  uint16_t rstart;
89  uint16_t rend;
91  cxbyte rwFlags:2;
92  cxbyte align:5;
93  cxbyte useRegMode:1; // usereg mode
94  uint16_t offsetLo;
95  };
96 
97  struct Chunk
98  {
99  size_t offsetFirst;
100  std::vector<RegVarUsageInt> items;
101  };
102 
103  std::vector<Chunk> chunks;
104 
106  explicit ISAUsageHandler();
107 public:
109  virtual ~ISAUsageHandler();
111  virtual ISAUsageHandler* copy() const = 0;
112 
114  void pushUsage(const AsmRegVarUsage& rvu);
116  bool hasNext(const ReadPos& readPos) const
117  { return readPos.chunkPos < chunks.size() && (readPos.chunkPos+1 != chunks.size() ||
118  readPos.itemPos < chunks.back().items.size());; }
120  AsmRegVarUsage nextUsage(ReadPos& readPos);
121  // find position by offset
122  ReadPos findPositionByOffset(size_t offset) const;
123 
125  virtual void getUsageDependencies(cxuint rvusNum, const AsmRegVarUsage* rvus,
126  cxbyte* linearDeps) const = 0;
127 };
128 
131 {
132 private:
133  std::vector<AsmRegVarLinearDep> regVarLinDeps;
134 public:
137 
139  void pushLinearDep(const AsmRegVarLinearDep& linearDep)
140  { regVarLinDeps.push_back(linearDep); }
142  size_t size() const
143  { return regVarLinDeps.size(); }
146  { return regVarLinDeps[pos]; }
148  size_t findPositionByOffset(size_t offset) const;
150  ISALinearDepHandler* copy() const;
151 };
152 
155 {
156 public:
158  GCNUsageHandler();
160  ~GCNUsageHandler();
161 
163  ISAUsageHandler* copy() const;
164 
165  void getUsageDependencies(cxuint rvusNum, const AsmRegVarUsage* rvus,
166  cxbyte* linearDeps) const;
167 };
168 
170 
177 {
178 public:
179  struct ReadPos
180  {
181  size_t delOpPos;
182  size_t waitInstrPos;
183  };
184 private:
185  std::vector<AsmDelayedOp> delayedOps;
186  std::vector<AsmWaitInstr> waitInstrs;
187 public:
189  ISAWaitHandler();
190 
192  void pushDelayedOp(const AsmDelayedOp& delOp)
193  { delayedOps.push_back(delOp); }
195  void pushWaitInstr(const AsmWaitInstr& waitInstr)
196  { waitInstrs.push_back(waitInstr); }
198  bool hasNext(const ReadPos& readPos) const
199  { return readPos.delOpPos < delayedOps.size() ||
200  readPos.waitInstrPos < waitInstrs.size(); }
202  bool nextInstr(ReadPos& readPos, AsmDelayedOp& delOp, AsmWaitInstr& waitInstr);
204  ReadPos findPositionByOffset(size_t offset) const;
206  ISAWaitHandler* copy() const;
207 };
208 
211 {
212 protected:
214 
216  void printWarning(const char* linePtr, const char* message);
218  void printError(const char* linePtr, const char* message);
220  void printWarning(const AsmSourcePos& sourcePos, const char* message);
222  void printError(const AsmSourcePos& sourcePos, const char* message);
224  void printWarningForRange(cxuint bits, uint64_t value, const AsmSourcePos& pos,
225  cxbyte signess = WS_BOTH);
226  void addCodeFlowEntry(AsmSectionId sectionId, const AsmCodeFlowEntry& entry);
228  explicit ISAAssembler(Assembler& assembler);
229 public:
231  virtual ~ISAAssembler();
233  virtual ISAUsageHandler* createUsageHandler() const = 0;
234 
236  virtual void assemble(const CString& mnemonic, const char* mnemPlace,
237  const char* linePtr, const char* lineEnd, std::vector<cxbyte>& output,
238  ISAUsageHandler* usageHandler, ISAWaitHandler* waitHandler) = 0;
240  virtual bool resolveCode(const AsmSourcePos& sourcePos, AsmSectionId targetSectionId,
241  cxbyte* sectionData, size_t offset, AsmExprTargetType targetType,
242  AsmSectionId sectionId, uint64_t value) = 0;
244  virtual bool checkMnemonic(const CString& mnemonic) const = 0;
246  virtual void setAllocatedRegisters(const cxuint* regs = nullptr,
247  Flags regFlags = 0) = 0;
249  virtual const cxuint* getAllocatedRegisters(size_t& regTypesNum,
250  Flags& regFlags) const = 0;
252  virtual void getMaxRegistersNum(size_t& regTypesNum, cxuint* maxRegs) const = 0;
254  virtual void getRegisterRanges(size_t& regTypesNum, cxuint* regRanges) const = 0;
256  virtual void fillAlignment(size_t size, cxbyte* output) = 0;
258  virtual bool parseRegisterRange(const char*& linePtr, cxuint& regStart,
259  cxuint& regEnd, const AsmRegVar*& regVar) = 0;
261  virtual bool relocationIsFit(cxuint bits, AsmExprTargetType tgtType) = 0;
263  virtual bool parseRegisterType(const char*& linePtr,
264  const char* end, cxuint& type) = 0;
266  virtual size_t getInstructionSize(size_t codeSize, const cxbyte* code) const = 0;
267  virtual const AsmWaitConfig& getWaitConfig() const = 0;
268 };
269 
272 {
273 public:
275  struct Regs {
279  };
280 private:
281  friend struct GCNAsmUtils; // INTERNAL LOGIC
282  union {
283  Regs regs;
284  cxuint regTable[2];
285  };
286  GPUArchMask curArchMask;
287  cxbyte currentRVUIndex;
288  AsmRegVarUsage instrRVUs[6];
289  bool hasWaitInstr;
290  AsmWaitInstr waitInstr;
291  AsmDelayedOp delayedOps[6];
292 
293  void resetInstrRVUs()
294  {
295  for (AsmRegVarUsage& rvu: instrRVUs)
296  {
297  rvu.useRegMode = false;
298  rvu.regField = ASMFIELD_NONE;
299  }
300  }
301  void resetWaitInstrs()
302  {
303  hasWaitInstr = false;
304  for (AsmDelayedOp& op: delayedOps)
305  op.delayedOpType = op.delayedOpType2 = ASMDELOP_NONE;
306  }
307 
308  void setCurrentRVU(cxbyte idx)
309  { currentRVUIndex = idx; }
310 
311  void setRegVarUsage(const AsmRegVarUsage& rvu);
312 
313  void flushInstrRVUs(ISAUsageHandler* usageHandler)
314  {
315  for (const AsmRegVarUsage& rvu: instrRVUs)
316  if (rvu.regField != ASMFIELD_NONE)
317  usageHandler->pushUsage(rvu);
318  }
319  void flushWaitInstrs(ISAWaitHandler* waitHandler)
320  {
321  for (const AsmDelayedOp& op: delayedOps)
322  if (op.delayedOpType != ASMDELOP_NONE)
323  waitHandler->pushDelayedOp(op);
324 
325  if (hasWaitInstr)
326  waitHandler->pushWaitInstr(waitInstr);
327  }
328 
329 public:
331  explicit GCNAssembler(Assembler& assembler);
333  ~GCNAssembler();
334 
335  ISAUsageHandler* createUsageHandler() const;
336 
337  void assemble(const CString& mnemonic, const char* mnemPlace, const char* linePtr,
338  const char* lineEnd, std::vector<cxbyte>& output,
339  ISAUsageHandler* usageHandler, ISAWaitHandler* waitHandler);
340  bool resolveCode(const AsmSourcePos& sourcePos, AsmSectionId targetSectionId,
341  cxbyte* sectionData, size_t offset, AsmExprTargetType targetType,
342  AsmSectionId sectionId, uint64_t value);
343  bool checkMnemonic(const CString& mnemonic) const;
344  void setAllocatedRegisters(const cxuint* regs, Flags regFlags);
345  const cxuint* getAllocatedRegisters(size_t& regTypesNum, Flags& regFlags) const;
346  void getMaxRegistersNum(size_t& regTypesNum, cxuint* maxRegs) const;
347  void getRegisterRanges(size_t& regTypesNum, cxuint* regRanges) const;
348  void fillAlignment(size_t size, cxbyte* output);
349  bool parseRegisterRange(const char*& linePtr, cxuint& regStart, cxuint& regEnd,
350  const AsmRegVar*& regVar);
351  bool relocationIsFit(cxuint bits, AsmExprTargetType tgtType);
352  bool parseRegisterType(const char*& linePtr, const char* end, cxuint& type);
353  size_t getInstructionSize(size_t codeSize, const cxbyte* code) const;
354  const AsmWaitConfig& getWaitConfig() const;
355 };
356 
358 {
359 public:
360  struct NextBlock
361  {
362  size_t block;
363  bool isCall;
364  };
365  struct SSAInfo
366  {
367  size_t ssaIdBefore;
368  size_t ssaIdFirst; // SSA id at first change
369  size_t ssaId;
370  size_t ssaIdLast;
371  size_t ssaIdChange;
372  size_t firstPos;
373  size_t lastPos;
375  SSAInfo(size_t _bssaId = SIZE_MAX, size_t _ssaIdF = SIZE_MAX,
376  size_t _ssaId = SIZE_MAX, size_t _ssaIdL = SIZE_MAX,
377  size_t _ssaIdChange = 0, bool _readBeforeWrite = false)
378  : ssaIdBefore(_bssaId), ssaIdFirst(_ssaIdF), ssaId(_ssaId),
379  ssaIdLast(_ssaIdL), ssaIdChange(_ssaIdChange),
380  readBeforeWrite(_readBeforeWrite)
381  { }
382  };
383  struct CodeBlock
384  {
385  size_t start, end; // place in code
386  // next blocks rules:
387  // if only one and next block have index: empty.
388  // if have calls, then implicitly the last next block have next block index
389  // and it is not inserted into nexts list.
390  // otherwise nexts list contains next blocks
391  std::vector<NextBlock> nexts;
392  bool haveCalls;
393  bool haveReturn;
394  bool haveEnd;
395  // key - regvar, value - SSA info for this regvar
397  ISAUsageHandler::ReadPos usagePos;
398  };
399 
401 
402  // first - orig ssaid, second - dest ssaid
403  typedef std::pair<size_t, size_t> SSAReplace;
404  typedef std::unordered_map<AsmSingleVReg, VectorSet<SSAReplace> > SSAReplacesMap;
405  // interference graph type
407  typedef std::unordered_map<AsmSingleVReg, std::vector<size_t> > VarIndexMap;
408  struct LinearDep
409  {
410  cxbyte align;
411  VectorSet<size_t> prevVidxes;
412  VectorSet<size_t> nextVidxes;
413  };
414 
416  {
417  DTree<size_t> vs[MAX_REGTYPES_NUM];
418  };
419 private:
420  Assembler& assembler;
421  std::vector<CodeBlock> codeBlocks;
422  SSAReplacesMap ssaReplacesMap;
423  size_t regTypesNum;
424 
425  Array<OutLiveness> outLivenesses[MAX_REGTYPES_NUM];
426  size_t graphVregsCounts[MAX_REGTYPES_NUM];
427  VarIndexMap vregIndexMaps[MAX_REGTYPES_NUM]; // indices to igraph for 2 reg types
428  InterGraph interGraphs[MAX_REGTYPES_NUM]; // for 2 register
429  Array<cxuint> graphColorMaps[MAX_REGTYPES_NUM];
430  std::unordered_map<size_t, LinearDep> linearDepMaps[MAX_REGTYPES_NUM];
431  // key - routine block, value - set of svvregs (lv indexes) used in routine
432  std::unordered_map<size_t, VIdxSetEntry> vidxRoutineMap;
433  // key - call block, value - set of svvregs (lv indexes) used between this call point
434  std::unordered_map<size_t, VIdxSetEntry> vidxCallMap;
435 
436 public:
437  AsmRegAllocator(Assembler& assembler);
438  // constructor for testing
439  AsmRegAllocator(Assembler& assembler, const std::vector<CodeBlock>& codeBlocks,
440  const SSAReplacesMap& ssaReplacesMap);
441 
442  void createCodeStructure(const std::vector<AsmCodeFlowEntry>& codeFlow,
443  size_t codeSize, const cxbyte* code);
444  void createSSAData(ISAUsageHandler& usageHandler,
445  ISALinearDepHandler& linDepHandler);
446  void applySSAReplaces();
447  void createLivenesses(ISAUsageHandler& usageHandler,
448  ISALinearDepHandler& linDepHandler);
449  void createInterferenceGraph();
450  void colorInterferenceGraph();
451 
452  void allocateRegisters(AsmSectionId sectionId);
453 
454  const std::vector<CodeBlock>& getCodeBlocks() const
455  { return codeBlocks; }
456  const SSAReplacesMap& getSSAReplacesMap() const
457  { return ssaReplacesMap; }
458  const Array<OutLiveness>* getOutLivenesses() const
459  { return outLivenesses; }
460 
461  const std::unordered_map<size_t, LinearDep>* getLinearDepMaps() const
462  { return linearDepMaps; }
463 
464  const VarIndexMap* getVregIndexMaps() const
465  { return vregIndexMaps; }
466 
467  const std::unordered_map<size_t, VIdxSetEntry>& getVIdxRoutineMap() const
468  { return vidxRoutineMap; }
469  const std::unordered_map<size_t, VIdxSetEntry>& getVIdxCallMap() const
470  { return vidxCallMap; }
471 };
472 
475 {
476 private:
477  const AsmWaitConfig& waitConfig;
478  Assembler& assembler;
479  const std::vector<AsmRegAllocator::CodeBlock>& codeBlocks;
480  const AsmRegAllocator::VarIndexMap* vregIndexMaps;
481  const Array<cxuint>* graphColorMaps;
482  bool onlyWarnings;
483  std::vector<AsmWaitInstr> neededWaitInstrs;
484 public:
485  AsmWaitScheduler(const AsmWaitConfig& asmWaitConfig, Assembler& assembler,
486  const std::vector<AsmRegAllocator::CodeBlock>& codeBlocks,
487  const AsmRegAllocator::VarIndexMap* vregIndexMaps,
488  const Array<cxuint>* graphColorMaps, bool onlyWarnings);
489 
490  void schedule(ISAUsageHandler& usageHandler, ISAWaitHandler& waitHandler);
491 
492  const std::vector<AsmWaitInstr>& getNeededWaitInstrs() const
493  { return neededWaitInstrs; }
494 };
495 
497 enum class AsmClauseType
498 {
499  IF,
500  ELSEIF,
501  ELSE,
502  REPEAT,
503  MACRO
504 };
505 
507 struct AsmClause
508 {
513 };
514 
517 {
518 public:
520  typedef std::pair<CString, uint64_t> DefSym;
522  typedef std::unordered_map<CString, AsmKernelId> KernelMap;
523 private:
524  friend class AsmStreamInputFilter;
525  friend class AsmMacroInputFilter;
526  friend class AsmForInputFilter;
527  friend class AsmExpression;
528  friend class AsmFormatHandler;
529  friend class AsmKcodeHandler;
530  friend class AsmRawCodeHandler;
531  friend class AsmAmdHandler;
532  friend class AsmAmdCL2Handler;
533  friend class AsmGalliumHandler;
534  friend class AsmROCmHandler;
535  friend class ISAAssembler;
536  friend class AsmRegAllocator;
537  friend class AsmWaitScheduler;
538 
539  friend struct AsmParseUtils; // INTERNAL LOGIC
540  friend struct AsmPseudoOps; // INTERNAL LOGIC
541  friend struct AsmKcodePseudoOps; // INTERNAL LOGIC
542  friend struct AsmGalliumPseudoOps; // INTERNAL LOGIC
543  friend struct AsmAmdPseudoOps; // INTERNAL LOGIC
544  friend struct AsmAmdCL2PseudoOps; // INTERNAL LOGIC
545  friend struct AsmROCmPseudoOps; // INTERNAL LOGIC
546  friend struct GCNAsmUtils; // INTERNAL LOGIC
547 
548  Array<CString> filenames;
549  BinaryFormat format;
550  GPUDeviceType deviceType;
551  uint32_t driverVersion;
552  uint32_t llvmVersion; // GalliumCompute
553  bool _64bit;
554  bool newROCmBinFormat;
555  bool good;
556  bool resolvingRelocs;
557  bool doNotRemoveFromSymbolClones;
558  cxuint policyVersion;
559  ISAAssembler* isaAssembler;
560  std::vector<DefSym> defSyms;
561  std::vector<CString> includeDirs;
562  std::vector<AsmSection> sections;
563  std::vector<Array<AsmSectionId> > relSpacesSections;
564  std::unordered_set<AsmSymbolEntry*> symbolSnapshots;
565  std::unordered_set<AsmSymbolEntry*> symbolClones;
566  std::vector<AsmExpression*> unevalExpressions;
567  std::vector<AsmRelocation> relocations;
568  std::unordered_map<const AsmRegVar*, AsmRegVarLinears> regVarLinearsMap;
569  AsmScope globalScope;
570  AsmMacroMap macroMap;
571  std::stack<AsmScope*> scopeStack;
572  std::vector<AsmScope*> abandonedScopes;
573  AsmScope* currentScope;
574  KernelMap kernelMap;
575  std::vector<AsmKernel> kernels;
576  Flags flags;
577  uint64_t macroCount;
578  uint64_t localCount; // macro's local count
579  bool alternateMacro;
580  bool buggyFPLit;
581  bool macroCase;
582  bool oldModParam;
583 
584  cxuint inclusionLevel;
585  cxuint macroSubstLevel;
586  cxuint repetitionLevel;
587  bool lineAlreadyRead; // if line already read
588 
589  size_t lineSize;
590  const char* line;
591  bool endOfAssembly;
592  bool sectionDiffsPrepared;
593  bool collectSourcePoses;
594 
595  cxuint filenameIndex;
596  std::stack<AsmInputFilter*> asmInputFilters;
597  AsmInputFilter* currentInputFilter;
598 
599  std::ostream& messageStream;
600  std::ostream& printStream;
601 
602  AsmFormatHandler* formatHandler;
603 
604  std::stack<AsmClause> clauses;
605 
606  AsmKernelId currentKernel;
607  AsmSectionId& currentSection;
608  uint64_t& currentOutPos;
609 
610  bool withSectionDiffs() const
611  { return formatHandler!=nullptr && formatHandler->isSectionDiffsResolvable(); }
612 
613  AsmSourcePos getSourcePos(LineCol lineCol) const
614  {
615  return { currentInputFilter->getMacroSubst(), currentInputFilter->getSource(),
616  lineCol.lineNo, lineCol.colNo };
617  }
618 
619  AsmSourcePos getSourcePos(size_t pos) const
620  { return currentInputFilter->getSourcePos(pos); }
621  AsmSourcePos getSourcePos(const char* linePtr) const
622  { return getSourcePos(linePtr-line); }
623 
624  void printWarning(const AsmSourcePos& pos, const char* message);
625  void printError(const AsmSourcePos& pos, const char* message);
626 
627  void printWarning(const char* linePtr, const char* message)
628  { printWarning(getSourcePos(linePtr), message); }
629  void printError(const char* linePtr, const char* message)
630  { printError(getSourcePos(linePtr), message); }
631 
632  void printWarning(LineCol lineCol, const char* message)
633  { printWarning(getSourcePos(lineCol), message); }
634  void printError(LineCol lineCol, const char* message)
635  { printError(getSourcePos(lineCol), message); }
636 
637  LineCol translatePos(const char* linePtr) const
638  { return currentInputFilter->translatePos(linePtr-line); }
639  LineCol translatePos(size_t pos) const
640  { return currentInputFilter->translatePos(pos); }
641 
642  bool parseLiteral(uint64_t& value, const char*& linePtr);
643  bool parseLiteralNoError(uint64_t& value, const char*& linePtr);
644  bool parseString(std::string& outString, const char*& linePtr);
645 
646  enum class ParseState
647  {
648  FAILED = 0,
649  PARSED,
650  MISSING // missing element
651  };
652 
656  ParseState parseSymbol(const char*& linePtr, AsmSymbolEntry*& entry,
657  bool localLabel = true, bool dontCreateSymbol = false);
658  bool skipSymbol(const char*& linePtr);
659 
660  bool setSymbol(AsmSymbolEntry& symEntry, uint64_t value, AsmSectionId sectionId);
661 
662  bool assignSymbol(const CString& symbolName, const char* symbolPlace,
663  const char* linePtr, bool reassign = true, bool baseExpr = false);
664 
665  bool assignOutputCounter(const char* symbolPlace, uint64_t value,
666  AsmSectionId sectionId, cxbyte fillValue = 0);
667 
668  void parsePseudoOps(const CString& firstName, const char* stmtPlace,
669  const char* linePtr);
670 
672  bool skipClauses(bool exitm = false);
673  bool putMacroContent(RefPtr<AsmMacro> macro);
674  bool putRepetitionContent(AsmRepeat& repeat);
675 
676  void initializeOutputFormat();
677 
678  bool pushClause(const char* string, AsmClauseType clauseType)
679  {
680  bool included; // to ignore
681  return pushClause(string, clauseType, true, included);
682  }
683  bool pushClause(const char* string, AsmClauseType clauseType,
684  bool satisfied, bool& included);
685  // return false when failed (for example no clauses)
686  bool popClause(const char* string, AsmClauseType clauseType);
687 
688  // recursive function to find scope in scope
689  AsmScope* findScopeInScope(AsmScope* scope, const CString& scopeName,
690  std::unordered_set<AsmScope*>& scopeSet);
691  // find scope by identifier
692  AsmScope* getRecurScope(const CString& scopePlace, bool ignoreLast = false,
693  const char** lastStep = nullptr);
694  // find symbol in scopes
695  // internal recursive function to find symbol in scope
696  AsmSymbolEntry* findSymbolInScopeInt(AsmScope* scope, const CString& symName,
697  std::unordered_set<AsmScope*>& scopeSet);
698  // scope - return scope from scoped name
699  AsmSymbolEntry* findSymbolInScope(const CString& symName, AsmScope*& scope,
700  CString& sameSymName, bool insertMode = false);
701  // similar to map::insert, but returns pointer
702  std::pair<AsmSymbolEntry*, bool> insertSymbolInScope(const CString& symName,
703  const AsmSymbol& symbol);
704 
705  // internal recursive function to find symbol in scope
706  AsmRegVarEntry* findRegVarInScopeInt(AsmScope* scope, const CString& rvName,
707  std::unordered_set<AsmScope*>& scopeSet);
708  // scope - return scope from scoped name
709  AsmRegVarEntry* findRegVarInScope(const CString& rvName, AsmScope*& scope,
710  CString& sameRvName, bool insertMode = false);
711  // similar to map::insert, but returns pointer
712  std::pair<AsmRegVarEntry*, bool> insertRegVarInScope(const CString& rvName,
713  const AsmRegVar& regVar);
714 
715  // create scope
716  bool getScope(AsmScope* parent, const CString& scopeName, AsmScope*& scope);
717  // push new scope level
718  bool pushScope(const CString& scopeName);
719  bool popScope();
720 
722  bool includeFile(const char* pseudoOpPlace, const std::string& filename);
723 
724  ParseState makeMacroSubstitution(const char* string);
725 
726  bool parseMacroArgValue(const char*& linePtr, std::string& outStr);
727 
728  void putData(size_t size, const cxbyte* data)
729  {
730  AsmSection& section = sections[currentSection];
731  section.content.insert(section.content.end(), data, data+size);
732  currentOutPos += size;
733  }
734 
735  cxbyte* reserveData(size_t size, cxbyte fillValue = 0);
736 
737  void goToMain(const char* pseudoOpPlace);
738  void goToKernel(const char* pseudoOpPlace, const char* kernelName);
739  void goToSection(const char* pseudoOpPlace, const char* sectionName, uint64_t align=0);
740  void goToSection(const char* pseudoOpPlace, const char* sectionName,
741  AsmSectionType type, Flags flags, uint64_t align=0);
742  void goToSection(const char* pseudoOpPlace, AsmSectionId sectionId, uint64_t align=0);
743 
744  void printWarningForRange(cxuint bits, uint64_t value, const AsmSourcePos& pos,
745  cxbyte signess = WS_BOTH);
746 
747  bool isAddressableSection() const
748  {
749  return currentSection==ASMSECT_ABS ||
750  (sections[currentSection].flags & ASMSECT_ADDRESSABLE) != 0;
751  }
752  bool isWriteableSection() const
753  {
754  return currentSection!=ASMSECT_ABS &&
755  (sections[currentSection].flags & ASMSECT_WRITEABLE) != 0;
756  }
757  bool isResolvableSection() const
758  {
759  return currentSection==ASMSECT_ABS ||
760  (sections[currentSection].flags & ASMSECT_UNRESOLVABLE) == 0;
761  }
762  bool isResolvableSection(AsmSectionId sectionId) const
763  {
764  return sectionId==ASMSECT_ABS ||
765  (sections[sectionId].flags & ASMSECT_UNRESOLVABLE) == 0;
766  }
767 
768  // oldKernels and newKernels must be sorted
769  void handleRegionsOnKernels(const std::vector<AsmKernelId>& newKernels,
770  const std::vector<AsmKernelId>& oldKernels, AsmSectionId codeSection);
771 
772  void tryToResolveSymbol(AsmSymbolEntry& symEntry);
773  void tryToResolveSymbols(AsmScope* scope);
774  void printUnresolvedSymbols(AsmScope* scope);
775 
776  bool resolveExprTarget(const AsmExpression* expr, uint64_t value,
777  AsmSectionId sectionId);
778 
779  void cloneSymEntryIfNeeded(AsmSymbolEntry& symEntry);
780 
781  void undefineSymbol(AsmSymbolEntry& symEntry);
782 
783 protected:
785  bool readLine();
786 public:
788 
797  explicit Assembler(const CString& filename, std::istream& input, Flags flags = 0,
800  std::ostream& msgStream = std::cerr, std::ostream& printStream = std::cout);
801 
803 
811  explicit Assembler(const Array<CString>& filenames, Flags flags = 0,
814  std::ostream& msgStream = std::cerr, std::ostream& printStream = std::cout);
816  ~Assembler();
817 
819  bool assemble();
820 
822  void writeBinary(const char* filename) const;
824  void writeBinary(std::ostream& outStream) const;
826  void writeBinary(Array<cxbyte>& array) const;
827 
829  uint32_t getDriverVersion() const
830  { return driverVersion; }
832  void setDriverVersion(uint32_t driverVersion)
833  { this->driverVersion = driverVersion; }
834 
836  uint32_t getLLVMVersion() const
837  { return llvmVersion; }
839  void setLLVMVersion(uint32_t llvmVersion)
840  { this->llvmVersion = llvmVersion; }
841 
844  { return deviceType; }
846  void setDeviceType(const GPUDeviceType deviceType)
847  { this->deviceType = deviceType; }
850  { return format; }
853  { format = binFormat; }
855  bool is64Bit() const
856  { return _64bit; }
858  void set64Bit(bool this64Bit)
859  { _64bit = this64Bit; }
861  bool isNewROCmBinFormat() const
862  { return newROCmBinFormat; }
864  void setNewROCmBinFormat(bool newFmt)
865  { newROCmBinFormat = newFmt; }
868  { return policyVersion; }
871  { policyVersion = pv; }
873  Flags getFlags() const
874  { return flags; }
876  void setFlags(Flags flags)
877  { this->flags = flags; }
879  bool isAltMacro() const
880  { return alternateMacro; }
882  bool isMacroCase() const
883  { return macroCase; }
885  bool isOldModParam() const
886  { return oldModParam; }
888  bool isBuggyFPLit() const
889  { return buggyFPLit; }
891  const std::vector<CString>& getIncludeDirs() const
892  { return includeDirs; }
894  void addIncludeDir(const CString& includeDir);
896  const AsmSymbolMap& getSymbolMap() const
897  { return globalScope.symbolMap; }
899  const std::vector<AsmSection>& getSections() const
900  { return sections; }
901  // get first sections for rel spaces
902  const std::vector<Array<AsmSectionId> >& getRelSpacesSections() const
903  { return relSpacesSections; }
905  const KernelMap& getKernelMap() const
906  { return kernelMap; }
908  const std::vector<AsmKernel>& getKernels() const
909  { return kernels; }
911  const AsmRegVarMap& getRegVarMap() const
912  { return globalScope.regVarMap; }
914  bool addRegVar(const CString& name, const AsmRegVar& var)
915  { return insertRegVarInScope(name, var).second; }
917  bool getRegVar(const CString& name, const AsmRegVar*& regVar);
918 
920  const AsmScope& getGlobalScope() const
921  { return globalScope; }
922 
924  bool isAbsoluteSymbol(const AsmSymbol& symbol) const;
925 
927  void addInitialDefSym(const CString& symName, uint64_t value);
928 
931  { return formatHandler; }
934  { return isaAssembler; }
935 };
936 
937 inline void ISAAssembler::printWarning(const char* linePtr, const char* message)
938 { assembler.printWarning(linePtr, message); }
939 
940 inline void ISAAssembler::printError(const char* linePtr, const char* message)
941 { assembler.printError(linePtr, message); }
942 
943 inline void ISAAssembler::printWarningForRange(cxuint bits, uint64_t value,
944  const AsmSourcePos& pos, cxbyte signess)
945 { assembler.printWarningForRange(bits, value, pos, signess); }
946 
947 inline void ISAAssembler::printWarning(const AsmSourcePos& sourcePos, const char* message)
948 { assembler.printWarning(sourcePos, message); }
949 
950 inline void ISAAssembler::printError(const AsmSourcePos& sourcePos, const char* message)
951 { assembler.printError(sourcePos, message); }
952 
953 inline void ISAAssembler::addCodeFlowEntry(AsmSectionId sectionId,
954  const AsmCodeFlowEntry& entry)
955 { assembler.sections[sectionId].addCodeFlowEntry(entry); }
956 
957 };
958 
959 #endif
AsmRepeatInputFilter or AsmIRPInputFilter.
bool is64Bit() const
get bitness (true if 64-bit)
Definition: Assembler.h:855
void set64Bit(bool this64Bit)
set bitness (true if 64-bit)
Definition: Assembler.h:858
AsmRegVarMap regVarMap
regvar map
Definition: AsmDefs.h:652
common definitions for assembler and disassembler
Definition: Assembler.h:360
main class of assembler
Definition: Assembler.h:516
bool haveReturn
code have return from routine
Definition: Assembler.h:393
AsmRegField regField
place in instruction
Definition: Assembler.h:90
non copyable and non movable base structure (class)
Definition: Utilities.h:46
assembler expression class
Definition: AsmDefs.h:286
uint16_t rstart
register start
Definition: Assembler.h:88
uint32_t Flags
type for declaring various flags
Definition: Utilities.h:100
Flags getFlags() const
get flags
Definition: Assembler.h:873
std::unordered_map< CString, AsmRegVar > AsmRegVarMap
regvar map
Definition: AsmDefs.h:535
assembler repeat
Definition: AsmSource.h:236
virtual ~ISAUsageHandler()
destructor
AMD CATALYST format.
size_t ssaIdChange
number of SSA id changes
Definition: Assembler.h:371
GPUDeviceType getDeviceType() const
get GPU device type
Definition: Assembler.h:843
AsmRegVarMap::value_type AsmRegVarEntry
regvar entry
Definition: AsmDefs.h:537
BinaryFormat
binary format for Assembler/Disassembler
Definition: Commons.h:38
const std::vector< AsmSection > & getSections() const
get sections
Definition: Assembler.h:899
ColNo colNo
column number, for macro substitution and IRP points to column preprocessed line
Definition: AsmSource.h:50
void setDriverVersion(uint32_t driverVersion)
set AMD driver version
Definition: Assembler.h:832
reference pointer based on Glibmm refptr
Definition: Utilities.h:860
uint16_t GPUArchMask
GPU architecture mask (one bit represents single GPU architecture)
Definition: GPUId.h:105
AsmClauseType
type of clause
Definition: Assembler.h:497
Assembler & assembler
assembler
Definition: Assembler.h:213
const AsmRegVarMap & getRegVarMap() const
get regvar map
Definition: Assembler.h:911
delayed result for register for instruction with delayed results
Definition: AsmDefs.h:600
assembler section
Definition: AsmDefs.h:692
DTree container (kind of B-Tree)
assembler macro input filter (for macro filtering)
Definition: AsmSource.h:447
std::pair< CString, uint64_t > DefSym
defined symbol entry
Definition: Assembler.h:520
bool isBuggyFPLit() const
get true if buggyFPLit enabled
Definition: Assembler.h:888
AsmRegVarUsage nextUsage(ReadPos &readPos)
get next usage
handles raw code format
Definition: AsmFormats.h:247
Definition: Assembler.h:415
assembler input layout filter
Definition: AsmSource.h:413
assembler scope for symbol, macros, regvars
Definition: AsmDefs.h:648
bool isNewROCmBinFormat() const
is new ROCm binary format
Definition: Assembler.h:861
Regvar info structure.
Definition: AsmDefs.h:510
void pushLinearDep(const AsmRegVarLinearDep &linearDep)
push linear dependency
Definition: Assembler.h:139
void printError(const char *linePtr, const char *message)
print error for position pointed by line pointer
Definition: Assembler.h:940
bool hasNext(const ReadPos &readPos) const
return true if has next instruction
Definition: Assembler.h:198
void printWarning(const char *linePtr, const char *message)
print warning for position pointed by line pointer
Definition: Assembler.h:937
cxuint sgprsNum
SGPRs number.
Definition: Assembler.h:276
virtual ISAUsageHandler * copy() const =0
copy this usage handler
an array class
Definition: Containers.h:41
std::vector< NextBlock > nexts
nexts blocks, if empty then direct next block
Definition: Assembler.h:391
Flags regFlags
define what extra register must be included
Definition: Assembler.h:278
bool isSectionDiffsResolvable() const
return true if format handler can resolve differences between sections
Definition: AsmFormats.h:165
void pushUsage(const AsmRegVarUsage &rvu)
push regvar or register usage
Configuration header.
cxuint AsmSectionId
type for Asm section id (index)
Definition: Commons.h:35
handles ROCM binary format
Definition: AsmFormats.h:556
size_t firstPos
first position in code block (section offset)
Definition: Assembler.h:372
Assembler Wait scheduler.
Definition: Assembler.h:474
size_t ssaId
original SSA id
Definition: Assembler.h:369
line and column
Definition: AsmSource.h:45
std::unordered_map< CString, AsmSymbol > AsmSymbolMap
assembler symbol map
Definition: AsmDefs.h:206
internal structure for regvar linear dependencies
Definition: AsmDefs.h:562
assembler &#39;for&#39; pseudo-op input filter
Definition: AsmSource.h:499
an assembler formats
virtual void getUsageDependencies(cxuint rvusNum, const AsmRegVarUsage *rvus, cxbyte *linearDeps) const =0
get usage dependencies around single instruction
void setFlags(Flags flags)
set flags
Definition: Assembler.h:876
void pushWaitInstr(const AsmWaitInstr &waitInstr)
wait instruction
Definition: Assembler.h:195
all flags
Definition: Assembler.h:67
AsmSourcePos sourcePos
position in source code
Definition: Assembler.h:510
Definition: Assembler.h:179
cxuint getPolicyVersion() const
get policy version
Definition: Assembler.h:867
#define CLRX_VERSION_NUMBER
CLRadeonExtender version number.
Definition: Config.h:39
wait handler
Definition: Assembler.h:176
if failed now, no later trial
AsmSectionType
assembler section type
Definition: AsmFormats.h:47
assembler format handler
Definition: AsmFormats.h:126
void setPolicyVersion(cxuint pv)
set policy version
Definition: Assembler.h:870
enable altmacro mode
Definition: Assembler.h:61
RefPtr< const AsmSource > getSource() const
get current source after reading line
Definition: AsmSource.h:392
unsigned char cxbyte
unsigned byte
Definition: Config.h:229
enable resolving symbols if ASM_TESTRUN enabled
Definition: Assembler.h:65
const std::vector< CString > & getIncludeDirs() const
get include directory list
Definition: Assembler.h:891
cxuint vgprsNum
VGPRs number.
Definition: Assembler.h:277
std::unordered_map< CString, RefPtr< const AsmMacro > > AsmMacroMap
assembler macro map
Definition: AsmDefs.h:640
AsmSourcePos prevIfPos
position of previous if-clause
Definition: Assembler.h:512
size_t ssaIdLast
last SSA id in last
Definition: Assembler.h:370
main namespace
Definition: AsmDefs.h:38
const AsmRegVar * regVar
if null, then usage of called register
Definition: Assembler.h:87
LineCol translatePos(size_t position) const
translate position to line number and column number
size_t lastPos
last position in code block (section offset)
Definition: Assembler.h:373
void setNewROCmBinFormat(bool newFmt)
set new ROCm binary format
Definition: Assembler.h:864
bool isAltMacro() const
get true if altMacro enabled
Definition: Assembler.h:879
bool isMacroCase() const
get true if macroCase enabled
Definition: Assembler.h:882
AsmClauseType type
type of clause
Definition: Assembler.h:509
Definition: Assembler.h:357
format handler with Kcode (kernel-code) handling
Definition: AsmFormats.h:219
unsigned int cxuint
unsigned int
Definition: Config.h:237
const KernelMap & getKernelMap() const
get kernel map
Definition: Assembler.h:905
Definition: Assembler.h:97
BinaryFormat getBinaryFormat() const
get binary format
Definition: Assembler.h:849
buggy handling of fpliterals (including fp constants)
Definition: Assembler.h:62
const AsmSymbolMap & getSymbolMap() const
get symbols map
Definition: Assembler.h:896
std::unordered_map< CString, AsmKernelId > KernelMap
kernel map type
Definition: Assembler.h:522
cxbyte AsmExprTargetType
expression target type (one byte)
Definition: AsmDefs.h:59
macro substitution
std::vector< cxbyte > content
content of section
Definition: AsmDefs.h:702
AsmSymbolMap::value_type AsmSymbolEntry
assembler symbol entry
Definition: AsmDefs.h:208
cxuint AsmKernelId
type for Asm kernel id (index)
Definition: Commons.h:33
Definition: Assembler.h:408
GCN (register and regvar) Usage handler.
Definition: Assembler.h:154
AsmRegVarLinearDep getLinearDep(size_t pos) const
get next linear dependency
Definition: Assembler.h:145
GPUDeviceType
type of GPU device
Definition: GPUId.h:51
bool hasNext(const ReadPos &readPos) const
has next regvar usage
Definition: Assembler.h:116
size_t size() const
return true if has next
Definition: Assembler.h:142
only for running tests
Definition: Assembler.h:66
enable all warnings for assembler
Definition: Assembler.h:59
void setDeviceType(const GPUDeviceType deviceType)
set GPU device type
Definition: Assembler.h:846
utilities for other libraries and programs
bool haveCalls
code have calls at its end
Definition: Assembler.h:392
bool readBeforeWrite
have read before write
Definition: Assembler.h:374
disable case-insensitive naming (default)
Definition: Assembler.h:64
GCN arch assembler.
Definition: Assembler.h:271
size_t ssaIdBefore
SSA id before first SSA in block.
Definition: Assembler.h:367
AsmSymbolMap symbolMap
symbol map
Definition: AsmDefs.h:651
register pool numbers
Definition: Assembler.h:275
Definition: Assembler.h:85
uint32_t getLLVMVersion() const
get LLVM version
Definition: Assembler.h:836
assembler&#39;s clause (if,else,macro,rept)
Definition: Assembler.h:507
regvar usage in code
Definition: AsmDefs.h:549
AsmSourcePos getSourcePos(size_t position) const
get source position after reading line
Definition: AsmSource.h:399
Definition: Assembler.h:365
GPU identification utilities.
LineNo lineNo
line number
Definition: AsmSource.h:47
bool addRegVar(const CString &name, const AsmRegVar &var)
add regvar
Definition: Assembler.h:914
const ISAAssembler * getISAAssembler() const
get ISA assembler
Definition: Assembler.h:933
stgructure that hold read position to store later
Definition: Assembler.h:78
section is unresolvable
Definition: AsmFormats.h:101
code flow entry
Definition: AsmDefs.h:632
uint32_t getDriverVersion() const
get AMD driver version
Definition: Assembler.h:829
const AsmFormatHandler * getFormatHandler() const
get format handler
Definition: Assembler.h:930
description of the WAIT instruction (for waiting for results)
Definition: AsmDefs.h:614
bool haveEnd
code have end
Definition: Assembler.h:394
ISAUsageHandler()
constructor
void printWarningForRange(cxuint bits, uint64_t value, const AsmSourcePos &pos, cxbyte signess=WS_BOTH)
print warning about integer out of range
Definition: Assembler.h:943
handles GalliumCompute format
Definition: AsmFormats.h:470
const AsmScope & getGlobalScope() const
get global scope
Definition: Assembler.h:920
void pushDelayedOp(const AsmDelayedOp &delOp)
push delayed result
Definition: Assembler.h:192
absolute section id
Definition: AsmFormats.h:86
an assembler sources handling
assembler input filter for reading lines
Definition: AsmSource.h:350
void setLLVMVersion(uint32_t llvmVersion)
set LLVM version
Definition: Assembler.h:839
assembler symbol structure
Definition: AsmDefs.h:143
RefPtr< const AsmMacroSubst > getMacroSubst() const
get current macro substitution after reading line
Definition: AsmSource.h:395
asm wait system configuration
Definition: AsmDefs.h:586
bool isOldModParam() const
get true if oldModParam enabled (old modifier parametrization)
Definition: Assembler.h:885
cxbyte AsmRegField
type of register field
Definition: AsmDefs.h:500
ISA regvar linear handler.
Definition: Assembler.h:130
force add symbols to binary
Definition: Assembler.h:60
ISA (register and regvar) Usage handler.
Definition: Assembler.h:74
handles AMD Catalyst format
Definition: AsmFormats.h:273
handles AMD OpenCL 2.0 binary format
Definition: AsmFormats.h:367
Definition: Assembler.h:383
ISA assembler class.
Definition: Assembler.h:210
simple C-string container
Definition: CString.h:38
an assembler for Radeon GPU&#39;s
containers and other utils for other libraries and programs
void setBinaryFormat(BinaryFormat binFormat)
set binary format
Definition: Assembler.h:852
uint16_t rend
register end
Definition: Assembler.h:89
bool condSatisfied
if conditional clause has already been satisfied
Definition: Assembler.h:511
const std::vector< AsmKernel > & getKernels() const
get kernels
Definition: Assembler.h:908
assembler source position
Definition: AsmSource.h:150