1 | /* |
---|
2 | * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library |
---|
3 | * Copyright (C) 2014-2018 Mateusz Szpakowski |
---|
4 | * |
---|
5 | * This library is free software; you can redistribute it and/or |
---|
6 | * modify it under the terms of the GNU Lesser General Public |
---|
7 | * License as published by the Free Software Foundation; either |
---|
8 | * version 2.1 of the License, or (at your option) any later version. |
---|
9 | * |
---|
10 | * This library is distributed in the hope that it will be useful, |
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
13 | * Lesser General Public License for more details. |
---|
14 | * |
---|
15 | * You should have received a copy of the GNU Lesser General Public |
---|
16 | * License along with this library; if not, write to the Free Software |
---|
17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
---|
18 | */ |
---|
19 | |
---|
20 | #include <CLRX/Config.h> |
---|
21 | #include <cassert> |
---|
22 | #include <cstdio> |
---|
23 | #include <cstring> |
---|
24 | #include <cstdint> |
---|
25 | #include <string> |
---|
26 | #include <vector> |
---|
27 | #include <algorithm> |
---|
28 | #include <utility> |
---|
29 | #include <memory> |
---|
30 | #include <unordered_set> |
---|
31 | #include <CLRX/amdbin/ElfBinaries.h> |
---|
32 | #include <CLRX/utils/Utilities.h> |
---|
33 | #include <CLRX/utils/MemAccess.h> |
---|
34 | #include <CLRX/utils/InputOutput.h> |
---|
35 | #include <CLRX/utils/Containers.h> |
---|
36 | #include <CLRX/amdbin/ROCmBinaries.h> |
---|
37 | |
---|
38 | using namespace CLRX; |
---|
39 | |
---|
40 | |
---|
41 | /* |
---|
42 | * ROCm binary reader and generator |
---|
43 | */ |
---|
44 | |
---|
45 | /* TODO: add support for various kernel code offset (now only 256 is supported) */ |
---|
46 | |
---|
47 | ROCmBinary::ROCmBinary(size_t binaryCodeSize, cxbyte* binaryCode, Flags creationFlags) |
---|
48 | : ElfBinary64(binaryCodeSize, binaryCode, creationFlags), |
---|
49 | regionsNum(0), codeSize(0), code(nullptr), |
---|
50 | globalDataSize(0), globalData(nullptr), metadataSize(0), metadata(nullptr), |
---|
51 | newBinFormat(false) |
---|
52 | { |
---|
53 | cxuint textIndex = SHN_UNDEF; |
---|
54 | try |
---|
55 | { textIndex = getSectionIndex(".text"); } |
---|
56 | catch(const Exception& ex) |
---|
57 | { } // ignore failed |
---|
58 | uint64_t codeOffset = 0; |
---|
59 | // find '.text' section |
---|
60 | if (textIndex!=SHN_UNDEF) |
---|
61 | { |
---|
62 | code = getSectionContent(textIndex); |
---|
63 | const Elf64_Shdr& textShdr = getSectionHeader(textIndex); |
---|
64 | codeSize = ULEV(textShdr.sh_size); |
---|
65 | codeOffset = ULEV(textShdr.sh_offset); |
---|
66 | } |
---|
67 | |
---|
68 | cxuint rodataIndex = SHN_UNDEF; |
---|
69 | try |
---|
70 | { rodataIndex = getSectionIndex(".rodata"); } |
---|
71 | catch(const Exception& ex) |
---|
72 | { } // ignore failed |
---|
73 | // find '.text' section |
---|
74 | if (rodataIndex!=SHN_UNDEF) |
---|
75 | { |
---|
76 | globalData = getSectionContent(rodataIndex); |
---|
77 | const Elf64_Shdr& rodataShdr = getSectionHeader(rodataIndex); |
---|
78 | globalDataSize = ULEV(rodataShdr.sh_size); |
---|
79 | } |
---|
80 | |
---|
81 | cxuint gpuConfigIndex = SHN_UNDEF; |
---|
82 | try |
---|
83 | { gpuConfigIndex = getSectionIndex(".AMDGPU.config"); } |
---|
84 | catch(const Exception& ex) |
---|
85 | { } // ignore failed |
---|
86 | newBinFormat = (gpuConfigIndex == SHN_UNDEF); |
---|
87 | |
---|
88 | cxuint relaDynIndex = SHN_UNDEF; |
---|
89 | try |
---|
90 | { relaDynIndex = getSectionIndex(".rela.dyn"); } |
---|
91 | catch(const Exception& ex) |
---|
92 | { } // ignore failed |
---|
93 | |
---|
94 | cxuint gotIndex = SHN_UNDEF; |
---|
95 | try |
---|
96 | { gotIndex = getSectionIndex(".got"); } |
---|
97 | catch(const Exception& ex) |
---|
98 | { } // ignore failed |
---|
99 | |
---|
100 | // counts regions (symbol or kernel) |
---|
101 | regionsNum = 0; |
---|
102 | const size_t symbolsNum = getSymbolsNum(); |
---|
103 | for (size_t i = 0; i < symbolsNum; i++) |
---|
104 | { |
---|
105 | // count regions number |
---|
106 | const Elf64_Sym& sym = getSymbol(i); |
---|
107 | const cxbyte symType = ELF64_ST_TYPE(sym.st_info); |
---|
108 | const cxbyte bind = ELF64_ST_BIND(sym.st_info); |
---|
109 | if (ULEV(sym.st_shndx)==textIndex && |
---|
110 | (symType==STT_GNU_IFUNC || (symType==STT_FUNC && !newBinFormat) || |
---|
111 | (bind==STB_GLOBAL && symType==STT_OBJECT))) |
---|
112 | regionsNum++; |
---|
113 | } |
---|
114 | if (code==nullptr && regionsNum!=0) |
---|
115 | throw BinException("No code if regions number is not zero"); |
---|
116 | regions.reset(new ROCmRegion[regionsNum]); |
---|
117 | size_t j = 0; |
---|
118 | typedef std::pair<uint64_t, size_t> RegionOffsetEntry; |
---|
119 | std::unique_ptr<RegionOffsetEntry[]> symOffsets(new RegionOffsetEntry[regionsNum]); |
---|
120 | |
---|
121 | // get regions info |
---|
122 | for (size_t i = 0; i < symbolsNum; i++) |
---|
123 | { |
---|
124 | const Elf64_Sym& sym = getSymbol(i); |
---|
125 | if (ULEV(sym.st_shndx)!=textIndex) |
---|
126 | continue; // if not in '.text' section |
---|
127 | const size_t value = ULEV(sym.st_value); |
---|
128 | if (value < codeOffset) |
---|
129 | throw BinException("Region offset is too small!"); |
---|
130 | const size_t size = ULEV(sym.st_size); |
---|
131 | |
---|
132 | const cxbyte symType = ELF64_ST_TYPE(sym.st_info); |
---|
133 | const cxbyte bind = ELF64_ST_BIND(sym.st_info); |
---|
134 | if (symType==STT_GNU_IFUNC || symType==STT_FUNC || |
---|
135 | (bind==STB_GLOBAL && symType==STT_OBJECT)) |
---|
136 | { |
---|
137 | ROCmRegionType type = ROCmRegionType::DATA; |
---|
138 | // if kernel |
---|
139 | if (symType==STT_GNU_IFUNC) |
---|
140 | type = ROCmRegionType::KERNEL; |
---|
141 | // if function kernel |
---|
142 | else if (symType==STT_FUNC) |
---|
143 | { |
---|
144 | if (newBinFormat) |
---|
145 | continue; |
---|
146 | type = ROCmRegionType::FKERNEL; |
---|
147 | } |
---|
148 | symOffsets[j] = std::make_pair(value, j); |
---|
149 | if (type!=ROCmRegionType::DATA && value+0x100 > codeOffset+codeSize) |
---|
150 | throw BinException("Kernel or code offset is too big!"); |
---|
151 | regions[j++] = { getSymbolName(i), size, value, type }; |
---|
152 | } |
---|
153 | } |
---|
154 | // sort regions by offset |
---|
155 | std::sort(symOffsets.get(), symOffsets.get()+regionsNum, |
---|
156 | [](const RegionOffsetEntry& a, const RegionOffsetEntry& b) |
---|
157 | { return a.first < b.first; }); |
---|
158 | // checking distance between regions |
---|
159 | for (size_t i = 1; i <= regionsNum; i++) |
---|
160 | { |
---|
161 | size_t end = (i<regionsNum) ? symOffsets[i].first : codeOffset+codeSize; |
---|
162 | ROCmRegion& region = regions[symOffsets[i-1].second]; |
---|
163 | if (region.type==ROCmRegionType::KERNEL && symOffsets[i-1].first+0x100 > end) |
---|
164 | throw BinException("Kernel size is too small!"); |
---|
165 | |
---|
166 | const size_t regSize = end - symOffsets[i-1].first; |
---|
167 | if (region.size==0) |
---|
168 | region.size = regSize; |
---|
169 | else |
---|
170 | region.size = std::min(regSize, region.size); |
---|
171 | } |
---|
172 | |
---|
173 | // load got symbols |
---|
174 | if (relaDynIndex != SHN_UNDEF && gotIndex != SHN_UNDEF) |
---|
175 | { |
---|
176 | const Elf64_Shdr& relaShdr = getSectionHeader(relaDynIndex); |
---|
177 | const Elf64_Shdr& gotShdr = getSectionHeader(gotIndex); |
---|
178 | |
---|
179 | size_t relaEntrySize = ULEV(relaShdr.sh_entsize); |
---|
180 | if (relaEntrySize==0) |
---|
181 | relaEntrySize = sizeof(Elf64_Rela); |
---|
182 | const size_t relaEntriesNum = ULEV(relaShdr.sh_size)/relaEntrySize; |
---|
183 | const size_t gotEntriesNum = ULEV(gotShdr.sh_size) >> 3; |
---|
184 | if (gotEntriesNum != relaEntriesNum) |
---|
185 | throw BinException("RelaDyn entries number and GOT entries " |
---|
186 | "number doesn't match!"); |
---|
187 | |
---|
188 | // initialize GOT symbols table |
---|
189 | gotSymbols.resize(gotEntriesNum); |
---|
190 | const cxbyte* relaDyn = getSectionContent(relaDynIndex); |
---|
191 | for (size_t i = 0; i < relaEntriesNum; i++) |
---|
192 | { |
---|
193 | const Elf64_Rela& rela = *reinterpret_cast<const Elf64_Rela*>( |
---|
194 | relaDyn + relaEntrySize*i); |
---|
195 | // check rela entry fields |
---|
196 | if (ULEV(rela.r_offset) != ULEV(gotShdr.sh_offset) + i*8) |
---|
197 | throw BinException("Wrong dyn relocation offset"); |
---|
198 | if (ULEV(rela.r_addend) != 0ULL) |
---|
199 | throw BinException("Wrong dyn relocation addend"); |
---|
200 | size_t symIndex = ELF64_R_SYM(ULEV(rela.r_info)); |
---|
201 | if (symIndex >= getDynSymbolsNum()) |
---|
202 | throw BinException("Dyn relocation symbol index out of range"); |
---|
203 | // just set in gotSymbols |
---|
204 | gotSymbols[i] = symIndex; |
---|
205 | } |
---|
206 | } |
---|
207 | |
---|
208 | // get metadata |
---|
209 | const size_t notesSize = getNotesSize(); |
---|
210 | const cxbyte* noteContent = (const cxbyte*)getNotes(); |
---|
211 | |
---|
212 | for (size_t offset = 0; offset < notesSize; ) |
---|
213 | { |
---|
214 | const Elf64_Nhdr* nhdr = (const Elf64_Nhdr*)(noteContent + offset); |
---|
215 | size_t namesz = ULEV(nhdr->n_namesz); |
---|
216 | size_t descsz = ULEV(nhdr->n_descsz); |
---|
217 | if (usumGt(offset, namesz+descsz, notesSize)) |
---|
218 | throw BinException("Note offset+size out of range"); |
---|
219 | |
---|
220 | if (namesz==4 && |
---|
221 | ::strcmp((const char*)noteContent+offset+ sizeof(Elf64_Nhdr), "AMD")==0) |
---|
222 | { |
---|
223 | const uint32_t noteType = ULEV(nhdr->n_type); |
---|
224 | if (noteType == 0xa) |
---|
225 | { |
---|
226 | metadata = (char*)(noteContent+offset+sizeof(Elf64_Nhdr) + 4); |
---|
227 | metadataSize = descsz; |
---|
228 | } |
---|
229 | else if (noteType == 0xb) |
---|
230 | target.assign((char*)(noteContent+offset+sizeof(Elf64_Nhdr) + 4), descsz); |
---|
231 | } |
---|
232 | size_t align = (((namesz+descsz)&3)!=0) ? 4-((namesz+descsz)&3) : 0; |
---|
233 | offset += sizeof(Elf64_Nhdr) + namesz + descsz + align; |
---|
234 | } |
---|
235 | |
---|
236 | if (hasRegionMap()) |
---|
237 | { |
---|
238 | // create region map |
---|
239 | regionsMap.resize(regionsNum); |
---|
240 | for (size_t i = 0; i < regionsNum; i++) |
---|
241 | regionsMap[i] = std::make_pair(regions[i].regionName, i); |
---|
242 | // sort region map |
---|
243 | mapSort(regionsMap.begin(), regionsMap.end()); |
---|
244 | } |
---|
245 | |
---|
246 | if ((creationFlags & ROCMBIN_CREATE_METADATAINFO) != 0 && |
---|
247 | metadata != nullptr && metadataSize != 0) |
---|
248 | { |
---|
249 | metadataInfo.reset(new ROCmMetadata()); |
---|
250 | parseROCmMetadata(metadataSize, metadata, *metadataInfo); |
---|
251 | |
---|
252 | if (hasKernelInfoMap()) |
---|
253 | { |
---|
254 | const std::vector<ROCmKernelMetadata>& kernels = metadataInfo->kernels; |
---|
255 | kernelInfosMap.resize(kernels.size()); |
---|
256 | for (size_t i = 0; i < kernelInfosMap.size(); i++) |
---|
257 | kernelInfosMap[i] = std::make_pair(kernels[i].name, i); |
---|
258 | // sort region map |
---|
259 | mapSort(kernelInfosMap.begin(), kernelInfosMap.end()); |
---|
260 | } |
---|
261 | } |
---|
262 | } |
---|
263 | |
---|
264 | /// determint GPU device from ROCm notes |
---|
265 | GPUDeviceType ROCmBinary::determineGPUDeviceType(uint32_t& outArchMinor, |
---|
266 | uint32_t& outArchStepping) const |
---|
267 | { |
---|
268 | uint32_t archMajor = 0; |
---|
269 | uint32_t archMinor = 0; |
---|
270 | uint32_t archStepping = 0; |
---|
271 | |
---|
272 | { |
---|
273 | const cxbyte* noteContent = (const cxbyte*)getNotes(); |
---|
274 | if (noteContent==nullptr) |
---|
275 | throw BinException("Missing notes in inner binary!"); |
---|
276 | size_t notesSize = getNotesSize(); |
---|
277 | // find note about AMDGPU |
---|
278 | for (size_t offset = 0; offset < notesSize; ) |
---|
279 | { |
---|
280 | const Elf64_Nhdr* nhdr = (const Elf64_Nhdr*)(noteContent + offset); |
---|
281 | size_t namesz = ULEV(nhdr->n_namesz); |
---|
282 | size_t descsz = ULEV(nhdr->n_descsz); |
---|
283 | if (usumGt(offset, namesz+descsz, notesSize)) |
---|
284 | throw BinException("Note offset+size out of range"); |
---|
285 | if (ULEV(nhdr->n_type) == 0x3 && namesz==4 && descsz>=0x1a && |
---|
286 | ::strcmp((const char*)noteContent+offset+sizeof(Elf64_Nhdr), "AMD")==0) |
---|
287 | { // AMDGPU type |
---|
288 | const uint32_t* content = (const uint32_t*) |
---|
289 | (noteContent+offset+sizeof(Elf64_Nhdr) + 4); |
---|
290 | archMajor = ULEV(content[1]); |
---|
291 | archMinor = ULEV(content[2]); |
---|
292 | archStepping = ULEV(content[3]); |
---|
293 | } |
---|
294 | size_t align = (((namesz+descsz)&3)!=0) ? 4-((namesz+descsz)&3) : 0; |
---|
295 | offset += sizeof(Elf64_Nhdr) + namesz + descsz + align; |
---|
296 | } |
---|
297 | } |
---|
298 | // determine device type |
---|
299 | GPUDeviceType deviceType = getGPUDeviceTypeFromArchVersion(archMajor, archMinor, |
---|
300 | archStepping); |
---|
301 | outArchMinor = archMinor; |
---|
302 | outArchStepping = archStepping; |
---|
303 | return deviceType; |
---|
304 | } |
---|
305 | |
---|
306 | const ROCmRegion& ROCmBinary::getRegion(const char* name) const |
---|
307 | { |
---|
308 | RegionMap::const_iterator it = binaryMapFind(regionsMap.begin(), |
---|
309 | regionsMap.end(), name); |
---|
310 | if (it == regionsMap.end()) |
---|
311 | throw BinException("Can't find region name"); |
---|
312 | return regions[it->second]; |
---|
313 | } |
---|
314 | |
---|
315 | const ROCmKernelMetadata& ROCmBinary::getKernelInfo(const char* name) const |
---|
316 | { |
---|
317 | if (!hasMetadataInfo()) |
---|
318 | throw BinException("Can't find kernel info name"); |
---|
319 | RegionMap::const_iterator it = binaryMapFind(kernelInfosMap.begin(), |
---|
320 | kernelInfosMap.end(), name); |
---|
321 | if (it == kernelInfosMap.end()) |
---|
322 | throw BinException("Can't find kernel info name"); |
---|
323 | return metadataInfo->kernels[it->second]; |
---|
324 | } |
---|
325 | |
---|
326 | // if ROCm binary |
---|
327 | bool CLRX::isROCmBinary(size_t binarySize, const cxbyte* binary) |
---|
328 | { |
---|
329 | if (!isElfBinary(binarySize, binary)) |
---|
330 | return false; |
---|
331 | if (binary[EI_CLASS] != ELFCLASS64) |
---|
332 | return false; |
---|
333 | const Elf64_Ehdr* ehdr = reinterpret_cast<const Elf64_Ehdr*>(binary); |
---|
334 | if (ULEV(ehdr->e_machine) != 0xe0) |
---|
335 | return false; |
---|
336 | return true; |
---|
337 | } |
---|
338 | |
---|
339 | |
---|
340 | void ROCmInput::addEmptyKernel(const char* kernelName) |
---|
341 | { |
---|
342 | symbols.push_back({ kernelName, 0, 0, ROCmRegionType::KERNEL }); |
---|
343 | } |
---|
344 | |
---|
345 | /* ROCm section generators */ |
---|
346 | |
---|
347 | class CLRX_INTERNAL ROCmGotGen: public ElfRegionContent |
---|
348 | { |
---|
349 | private: |
---|
350 | const ROCmInput* input; |
---|
351 | public: |
---|
352 | explicit ROCmGotGen(const ROCmInput* _input) : input(_input) |
---|
353 | { } |
---|
354 | |
---|
355 | void operator()(FastOutputBuffer& fob) const |
---|
356 | { |
---|
357 | fob.fill(input->gotSymbols.size()*8, 0); |
---|
358 | } |
---|
359 | }; |
---|
360 | |
---|
361 | class CLRX_INTERNAL ROCmRelaDynGen: public ElfRegionContent |
---|
362 | { |
---|
363 | private: |
---|
364 | size_t gotOffset; |
---|
365 | const ROCmInput* input; |
---|
366 | public: |
---|
367 | explicit ROCmRelaDynGen(const ROCmInput* _input) : gotOffset(0), input(_input) |
---|
368 | { } |
---|
369 | |
---|
370 | void setGotOffset(size_t _gotOffset) |
---|
371 | { gotOffset = _gotOffset; } |
---|
372 | |
---|
373 | void operator()(FastOutputBuffer& fob) const |
---|
374 | { |
---|
375 | for (size_t i = 0; i < input->gotSymbols.size(); i++) |
---|
376 | { |
---|
377 | size_t symIndex = input->gotSymbols[i]; |
---|
378 | Elf64_Rela rela{}; |
---|
379 | SLEV(rela.r_offset, gotOffset + 8*i); |
---|
380 | SLEV(rela.r_info, ELF64_R_INFO(symIndex + 1, 3)); |
---|
381 | rela.r_addend = 0; |
---|
382 | fob.writeObject(rela); |
---|
383 | } |
---|
384 | } |
---|
385 | }; |
---|
386 | |
---|
387 | /* |
---|
388 | * ROCm Binary Generator |
---|
389 | */ |
---|
390 | |
---|
391 | ROCmBinGenerator::ROCmBinGenerator() : manageable(false), input(nullptr) |
---|
392 | { } |
---|
393 | |
---|
394 | ROCmBinGenerator::ROCmBinGenerator(const ROCmInput* rocmInput) |
---|
395 | : manageable(false), input(rocmInput), rocmGotGen(nullptr), rocmRelaDynGen(nullptr) |
---|
396 | { } |
---|
397 | |
---|
398 | ROCmBinGenerator::ROCmBinGenerator(GPUDeviceType deviceType, |
---|
399 | uint32_t archMinor, uint32_t archStepping, size_t codeSize, const cxbyte* code, |
---|
400 | size_t globalDataSize, const cxbyte* globalData, |
---|
401 | const std::vector<ROCmSymbolInput>& symbols) : |
---|
402 | rocmGotGen(nullptr), rocmRelaDynGen(nullptr) |
---|
403 | { |
---|
404 | std::unique_ptr<ROCmInput> _input(new ROCmInput{}); |
---|
405 | _input->deviceType = deviceType; |
---|
406 | _input->archMinor = archMinor; |
---|
407 | _input->archStepping = archStepping; |
---|
408 | _input->eflags = 0; |
---|
409 | _input->newBinFormat = false; |
---|
410 | _input->globalDataSize = globalDataSize; |
---|
411 | _input->globalData = globalData; |
---|
412 | _input->symbols = symbols; |
---|
413 | _input->codeSize = codeSize; |
---|
414 | _input->code = code; |
---|
415 | _input->commentSize = 0; |
---|
416 | _input->comment = nullptr; |
---|
417 | _input->target = ""; |
---|
418 | _input->targetTripple = ""; |
---|
419 | _input->metadataSize = 0; |
---|
420 | _input->metadata = nullptr; |
---|
421 | _input->useMetadataInfo = false; |
---|
422 | _input->metadataInfo = ROCmMetadata{}; |
---|
423 | input = _input.release(); |
---|
424 | } |
---|
425 | |
---|
426 | ROCmBinGenerator::ROCmBinGenerator(GPUDeviceType deviceType, |
---|
427 | uint32_t archMinor, uint32_t archStepping, size_t codeSize, const cxbyte* code, |
---|
428 | size_t globalDataSize, const cxbyte* globalData, |
---|
429 | std::vector<ROCmSymbolInput>&& symbols) : |
---|
430 | rocmGotGen(nullptr), rocmRelaDynGen(nullptr) |
---|
431 | { |
---|
432 | std::unique_ptr<ROCmInput> _input(new ROCmInput{}); |
---|
433 | _input->deviceType = deviceType; |
---|
434 | _input->archMinor = archMinor; |
---|
435 | _input->archStepping = archStepping; |
---|
436 | _input->eflags = 0; |
---|
437 | _input->newBinFormat = false; |
---|
438 | _input->globalDataSize = globalDataSize; |
---|
439 | _input->globalData = globalData; |
---|
440 | _input->symbols = std::move(symbols); |
---|
441 | _input->codeSize = codeSize; |
---|
442 | _input->code = code; |
---|
443 | _input->commentSize = 0; |
---|
444 | _input->comment = nullptr; |
---|
445 | _input->target = ""; |
---|
446 | _input->targetTripple = ""; |
---|
447 | _input->metadataSize = 0; |
---|
448 | _input->metadata = nullptr; |
---|
449 | _input->useMetadataInfo = false; |
---|
450 | _input->metadataInfo = ROCmMetadata{}; |
---|
451 | input = _input.release(); |
---|
452 | } |
---|
453 | |
---|
454 | ROCmBinGenerator::~ROCmBinGenerator() |
---|
455 | { |
---|
456 | if (manageable) |
---|
457 | delete input; |
---|
458 | if (rocmGotGen!=nullptr) |
---|
459 | delete (ROCmGotGen*)rocmGotGen; |
---|
460 | if (rocmRelaDynGen!=nullptr) |
---|
461 | delete (ROCmRelaDynGen*)rocmRelaDynGen; |
---|
462 | } |
---|
463 | |
---|
464 | void ROCmBinGenerator::setInput(const ROCmInput* input) |
---|
465 | { |
---|
466 | if (manageable) |
---|
467 | delete input; |
---|
468 | manageable = false; |
---|
469 | this->input = input; |
---|
470 | } |
---|
471 | |
---|
472 | // ELF notes contents |
---|
473 | static const cxbyte noteDescType1[8] = |
---|
474 | { 2, 0, 0, 0, 1, 0, 0, 0 }; |
---|
475 | |
---|
476 | static const cxbyte noteDescType3[27] = |
---|
477 | { 4, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
478 | 'A', 'M', 'D', 0, 'A', 'M', 'D', 'G', 'P', 'U', 0 }; |
---|
479 | |
---|
480 | static inline void addMainSectionToTable(cxuint& sectionsNum, uint16_t* builtinTable, |
---|
481 | cxuint elfSectId) |
---|
482 | { builtinTable[elfSectId - ELFSECTID_START] = sectionsNum++; } |
---|
483 | |
---|
484 | void ROCmBinGenerator::prepareBinaryGen() |
---|
485 | { |
---|
486 | AMDGPUArchVersion amdGpuArchValues = getGPUArchVersion(input->deviceType, |
---|
487 | GPUArchVersionTable::ROCM); |
---|
488 | if (input->archMinor!=UINT32_MAX) |
---|
489 | amdGpuArchValues.minor = input->archMinor; |
---|
490 | if (input->archStepping!=UINT32_MAX) |
---|
491 | amdGpuArchValues.stepping = input->archStepping; |
---|
492 | |
---|
493 | comment = "CLRX ROCmBinGenerator " CLRX_VERSION; |
---|
494 | commentSize = ::strlen(comment); |
---|
495 | if (input->comment!=nullptr) |
---|
496 | { |
---|
497 | // if comment, store comment section |
---|
498 | comment = input->comment; |
---|
499 | commentSize = input->commentSize; |
---|
500 | if (commentSize==0) |
---|
501 | commentSize = ::strlen(comment); |
---|
502 | } |
---|
503 | |
---|
504 | uint32_t eflags = input->newBinFormat ? 2 : 0; |
---|
505 | if (input->eflags != BINGEN_DEFAULT) |
---|
506 | eflags = input->eflags; |
---|
507 | |
---|
508 | std::fill(mainBuiltinSectTable, |
---|
509 | mainBuiltinSectTable + ROCMSECTID_MAX-ELFSECTID_START+1, SHN_UNDEF); |
---|
510 | mainSectionsNum = 1; |
---|
511 | |
---|
512 | // generate main builtin section table (for section id translation) |
---|
513 | if (input->newBinFormat) |
---|
514 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ROCMSECTID_NOTE); |
---|
515 | if (input->globalData != nullptr) |
---|
516 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_RODATA); |
---|
517 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_DYNSYM); |
---|
518 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ROCMSECTID_HASH); |
---|
519 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_DYNSTR); |
---|
520 | if (!input->gotSymbols.empty()) |
---|
521 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ROCMSECTID_RELADYN); |
---|
522 | const cxuint execProgHeaderRegionIndex = mainSectionsNum; |
---|
523 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_TEXT); |
---|
524 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ROCMSECTID_DYNAMIC); |
---|
525 | if (!input->gotSymbols.empty()) |
---|
526 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ROCMSECTID_GOT); |
---|
527 | if (!input->newBinFormat) |
---|
528 | { |
---|
529 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ROCMSECTID_NOTE); |
---|
530 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ROCMSECTID_GPUCONFIG); |
---|
531 | } |
---|
532 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_COMMENT); |
---|
533 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_SYMTAB); |
---|
534 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_SHSTRTAB); |
---|
535 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_STRTAB); |
---|
536 | |
---|
537 | elfBinGen64.reset(new ElfBinaryGen64({ 0U, 0U, 0x40, 0, ET_DYN, 0xe0, EV_CURRENT, |
---|
538 | cxuint(input->newBinFormat ? execProgHeaderRegionIndex : UINT_MAX), 0, eflags }, |
---|
539 | true, true, true, PHREGION_FILESTART)); |
---|
540 | |
---|
541 | static const int32_t dynTags[] = { |
---|
542 | DT_SYMTAB, DT_SYMENT, DT_STRTAB, DT_STRSZ, DT_HASH }; |
---|
543 | elfBinGen64->addDynamics(sizeof(dynTags)/sizeof(int32_t), dynTags); |
---|
544 | |
---|
545 | // elf program headers |
---|
546 | elfBinGen64->addProgramHeader({ PT_PHDR, PF_R, 0, 1, |
---|
547 | true, Elf64Types::nobase, Elf64Types::nobase, 0 }); |
---|
548 | elfBinGen64->addProgramHeader({ PT_LOAD, PF_R, PHREGION_FILESTART, |
---|
549 | execProgHeaderRegionIndex, |
---|
550 | true, Elf64Types::nobase, Elf64Types::nobase, 0, 0x1000 }); |
---|
551 | elfBinGen64->addProgramHeader({ PT_LOAD, PF_R|PF_X, execProgHeaderRegionIndex, 1, |
---|
552 | true, Elf64Types::nobase, Elf64Types::nobase, 0 }); |
---|
553 | elfBinGen64->addProgramHeader({ PT_LOAD, PF_R|PF_W, execProgHeaderRegionIndex+1, |
---|
554 | cxuint(1 + (!input->gotSymbols.empty())), |
---|
555 | true, Elf64Types::nobase, Elf64Types::nobase, 0 }); |
---|
556 | elfBinGen64->addProgramHeader({ PT_DYNAMIC, PF_R|PF_W, execProgHeaderRegionIndex+1, 1, |
---|
557 | true, Elf64Types::nobase, Elf64Types::nobase, 0, 8 }); |
---|
558 | elfBinGen64->addProgramHeader({ PT_GNU_RELRO, PF_R, execProgHeaderRegionIndex+1, |
---|
559 | cxuint(1 + (!input->gotSymbols.empty())), |
---|
560 | true, Elf64Types::nobase, Elf64Types::nobase, 0, 1 }); |
---|
561 | elfBinGen64->addProgramHeader({ PT_GNU_STACK, PF_R|PF_W, PHREGION_FILESTART, 0, |
---|
562 | true, 0, 0, 0 }); |
---|
563 | |
---|
564 | if (input->newBinFormat) |
---|
565 | // program header for note (new binary format) |
---|
566 | elfBinGen64->addProgramHeader({ PT_NOTE, PF_R, 1, 1, true, |
---|
567 | Elf64Types::nobase, Elf64Types::nobase, 0, 4 }); |
---|
568 | |
---|
569 | target = input->target.c_str(); |
---|
570 | if (target.empty() && !input->targetTripple.empty()) |
---|
571 | { |
---|
572 | target = input->targetTripple.c_str(); |
---|
573 | char dbuf[20]; |
---|
574 | snprintf(dbuf, 20, "-gfx%u%u%u", amdGpuArchValues.major, amdGpuArchValues.minor, |
---|
575 | amdGpuArchValues.stepping); |
---|
576 | target += dbuf; |
---|
577 | } |
---|
578 | // elf notes |
---|
579 | elfBinGen64->addNote({"AMD", sizeof noteDescType1, noteDescType1, 1U}); |
---|
580 | noteBuf.reset(new cxbyte[0x1b]); |
---|
581 | ::memcpy(noteBuf.get(), noteDescType3, 0x1b); |
---|
582 | SULEV(*(uint32_t*)(noteBuf.get()+4), amdGpuArchValues.major); |
---|
583 | SULEV(*(uint32_t*)(noteBuf.get()+8), amdGpuArchValues.minor); |
---|
584 | SULEV(*(uint32_t*)(noteBuf.get()+12), amdGpuArchValues.stepping); |
---|
585 | elfBinGen64->addNote({"AMD", 0x1b, noteBuf.get(), 3U}); |
---|
586 | if (!target.empty()) |
---|
587 | elfBinGen64->addNote({"AMD", target.size(), (const cxbyte*)target.c_str(), 0xbU}); |
---|
588 | |
---|
589 | metadataSize = input->metadataSize; |
---|
590 | metadata = input->metadata; |
---|
591 | if (input->useMetadataInfo) |
---|
592 | { |
---|
593 | // generate ROCm metadata |
---|
594 | std::vector<std::pair<CString, size_t> > symbolIndices(input->symbols.size()); |
---|
595 | // create sorted indices of symbols by its name |
---|
596 | for (size_t k = 0; k < input->symbols.size(); k++) |
---|
597 | symbolIndices[k] = std::make_pair(input->symbols[k].symbolName, k); |
---|
598 | mapSort(symbolIndices.begin(), symbolIndices.end()); |
---|
599 | |
---|
600 | const size_t mdKernelsNum = input->metadataInfo.kernels.size(); |
---|
601 | std::unique_ptr<const ROCmKernelConfig*[]> kernelConfigPtrs( |
---|
602 | new const ROCmKernelConfig*[mdKernelsNum]); |
---|
603 | // generate ROCm kernel config pointers |
---|
604 | for (size_t k = 0; k < mdKernelsNum; k++) |
---|
605 | { |
---|
606 | auto it = binaryMapFind(symbolIndices.begin(), symbolIndices.end(), |
---|
607 | input->metadataInfo.kernels[k].name); |
---|
608 | if (it == symbolIndices.end() || |
---|
609 | (input->symbols[it->second].type != ROCmRegionType::FKERNEL && |
---|
610 | input->symbols[it->second].type != ROCmRegionType::KERNEL)) |
---|
611 | throw BinGenException("Kernel in metadata doesn't exists in code"); |
---|
612 | kernelConfigPtrs[k] = reinterpret_cast<const ROCmKernelConfig*>( |
---|
613 | input->code + input->symbols[it->second].offset); |
---|
614 | } |
---|
615 | // just generate ROCm metadata from info |
---|
616 | generateROCmMetadata(input->metadataInfo, kernelConfigPtrs.get(), metadataStr); |
---|
617 | metadataSize = metadataStr.size(); |
---|
618 | metadata = metadataStr.c_str(); |
---|
619 | } |
---|
620 | |
---|
621 | if (metadataSize != 0) |
---|
622 | elfBinGen64->addNote({"AMD", metadataSize, (const cxbyte*)metadata, 0xaU}); |
---|
623 | |
---|
624 | /// region and sections |
---|
625 | elfBinGen64->addRegion(ElfRegion64::programHeaderTable()); |
---|
626 | if (input->newBinFormat) |
---|
627 | elfBinGen64->addRegion(ElfRegion64::noteSection()); |
---|
628 | if (input->globalData != nullptr) |
---|
629 | elfBinGen64->addRegion(ElfRegion64(input->globalDataSize, input->globalData, 4, |
---|
630 | ".rodata", SHT_PROGBITS, SHF_ALLOC, 0, 0, Elf64Types::nobase)); |
---|
631 | |
---|
632 | elfBinGen64->addRegion(ElfRegion64(0, (const cxbyte*)nullptr, 8, |
---|
633 | ".dynsym", SHT_DYNSYM, SHF_ALLOC, 0, BINGEN_DEFAULT, Elf64Types::nobase)); |
---|
634 | elfBinGen64->addRegion(ElfRegion64(0, (const cxbyte*)nullptr, 4, |
---|
635 | ".hash", SHT_HASH, SHF_ALLOC, |
---|
636 | mainBuiltinSectTable[ELFSECTID_DYNSYM-ELFSECTID_START], 0, |
---|
637 | Elf64Types::nobase)); |
---|
638 | elfBinGen64->addRegion(ElfRegion64(0, (const cxbyte*)nullptr, 1, ".dynstr", SHT_STRTAB, |
---|
639 | SHF_ALLOC, 0, 0, Elf64Types::nobase)); |
---|
640 | if (!input->gotSymbols.empty()) |
---|
641 | { |
---|
642 | ROCmRelaDynGen* sgen = new ROCmRelaDynGen(input); |
---|
643 | rocmRelaDynGen = (void*)sgen; |
---|
644 | elfBinGen64->addRegion(ElfRegion64(input->gotSymbols.size()*sizeof(Elf64_Rela), |
---|
645 | sgen, 8, ".rela.dyn", SHT_RELA, SHF_ALLOC, |
---|
646 | mainBuiltinSectTable[ELFSECTID_DYNSYM-ELFSECTID_START], 0, |
---|
647 | Elf64Types::nobase, sizeof(Elf64_Rela))); |
---|
648 | } |
---|
649 | // '.text' with alignment=4096 |
---|
650 | elfBinGen64->addRegion(ElfRegion64(input->codeSize, (const cxbyte*)input->code, |
---|
651 | 0x1000, ".text", SHT_PROGBITS, SHF_ALLOC|SHF_EXECINSTR, 0, 0, |
---|
652 | Elf64Types::nobase, 0, false, 256)); |
---|
653 | elfBinGen64->addRegion(ElfRegion64(0, (const cxbyte*)nullptr, 0x1000, |
---|
654 | ".dynamic", SHT_DYNAMIC, SHF_ALLOC|SHF_WRITE, |
---|
655 | mainBuiltinSectTable[ELFSECTID_DYNSTR-ELFSECTID_START], 0, |
---|
656 | Elf64Types::nobase, 0, false, 8)); |
---|
657 | if (!input->gotSymbols.empty()) |
---|
658 | { |
---|
659 | ROCmGotGen* sgen = new ROCmGotGen(input); |
---|
660 | rocmGotGen = (void*)sgen; |
---|
661 | elfBinGen64->addRegion(ElfRegion64(input->gotSymbols.size()*8, sgen, |
---|
662 | 8, ".got", SHT_PROGBITS, |
---|
663 | SHF_ALLOC|SHF_WRITE, 0, 0, Elf64Types::nobase)); |
---|
664 | } |
---|
665 | if (!input->newBinFormat) |
---|
666 | { |
---|
667 | elfBinGen64->addRegion(ElfRegion64::noteSection()); |
---|
668 | elfBinGen64->addRegion(ElfRegion64(0, (const cxbyte*)nullptr, 1, |
---|
669 | ".AMDGPU.config", SHT_PROGBITS, 0)); |
---|
670 | } |
---|
671 | elfBinGen64->addRegion(ElfRegion64(commentSize, (const cxbyte*)comment, 1, ".comment", |
---|
672 | SHT_PROGBITS, SHF_MERGE|SHF_STRINGS, 0, 0, 0, 1)); |
---|
673 | elfBinGen64->addRegion(ElfRegion64(0, (const cxbyte*)nullptr, 8, |
---|
674 | ".symtab", SHT_SYMTAB, 0, 0, BINGEN_DEFAULT)); |
---|
675 | elfBinGen64->addRegion(ElfRegion64::shstrtabSection()); |
---|
676 | elfBinGen64->addRegion(ElfRegion64::strtabSection()); |
---|
677 | elfBinGen64->addRegion(ElfRegion64::sectionHeaderTable()); |
---|
678 | |
---|
679 | /* extra sections */ |
---|
680 | for (const BinSection& section: input->extraSections) |
---|
681 | elfBinGen64->addRegion(ElfRegion64(section, mainBuiltinSectTable, |
---|
682 | ROCMSECTID_MAX, mainSectionsNum)); |
---|
683 | updateSymbols(); |
---|
684 | binarySize = elfBinGen64->countSize(); |
---|
685 | |
---|
686 | if (rocmRelaDynGen != nullptr) |
---|
687 | ((ROCmRelaDynGen*)rocmRelaDynGen)->setGotOffset( |
---|
688 | elfBinGen64->getRegionOffset( |
---|
689 | mainBuiltinSectTable[ROCMSECTID_GOT - ELFSECTID_START])); |
---|
690 | } |
---|
691 | |
---|
692 | void ROCmBinGenerator::updateSymbols() |
---|
693 | { |
---|
694 | elfBinGen64->clearSymbols(); |
---|
695 | elfBinGen64->clearDynSymbols(); |
---|
696 | // add symbols (kernels, function kernels and data symbols) |
---|
697 | elfBinGen64->addSymbol(ElfSymbol64("_DYNAMIC", |
---|
698 | mainBuiltinSectTable[ROCMSECTID_DYNAMIC-ELFSECTID_START], |
---|
699 | ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE), STV_HIDDEN, true, 0, 0)); |
---|
700 | const uint16_t textSectIndex = mainBuiltinSectTable[ELFSECTID_TEXT-ELFSECTID_START]; |
---|
701 | for (const ROCmSymbolInput& symbol: input->symbols) |
---|
702 | { |
---|
703 | ElfSymbol64 elfsym; |
---|
704 | switch (symbol.type) |
---|
705 | { |
---|
706 | case ROCmRegionType::KERNEL: |
---|
707 | elfsym = ElfSymbol64(symbol.symbolName.c_str(), textSectIndex, |
---|
708 | ELF64_ST_INFO(STB_GLOBAL, STT_GNU_IFUNC), 0, true, |
---|
709 | symbol.offset, symbol.size); |
---|
710 | break; |
---|
711 | case ROCmRegionType::FKERNEL: |
---|
712 | elfsym = ElfSymbol64(symbol.symbolName.c_str(), textSectIndex, |
---|
713 | ELF64_ST_INFO(STB_GLOBAL, STT_FUNC), 0, true, |
---|
714 | symbol.offset, symbol.size); |
---|
715 | break; |
---|
716 | case ROCmRegionType::DATA: |
---|
717 | elfsym = ElfSymbol64(symbol.symbolName.c_str(), textSectIndex, |
---|
718 | ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 0, true, |
---|
719 | symbol.offset, symbol.size); |
---|
720 | break; |
---|
721 | default: |
---|
722 | break; |
---|
723 | } |
---|
724 | // add to symbols and dynamic symbols table |
---|
725 | elfBinGen64->addSymbol(elfsym); |
---|
726 | elfBinGen64->addDynSymbol(elfsym); |
---|
727 | } |
---|
728 | /* extra symbols */ |
---|
729 | for (const BinSymbol& symbol: input->extraSymbols) |
---|
730 | { |
---|
731 | ElfSymbol64 sym(symbol, mainBuiltinSectTable, |
---|
732 | ROCMSECTID_MAX, mainSectionsNum); |
---|
733 | elfBinGen64->addSymbol(sym); |
---|
734 | elfBinGen64->addDynSymbol(sym); |
---|
735 | } |
---|
736 | } |
---|
737 | |
---|
738 | void ROCmBinGenerator::generateInternal(std::ostream* osPtr, std::vector<char>* vPtr, |
---|
739 | Array<cxbyte>* aPtr) |
---|
740 | { |
---|
741 | if (elfBinGen64 == nullptr) |
---|
742 | prepareBinaryGen(); |
---|
743 | /**** |
---|
744 | * prepare for write binary to output |
---|
745 | ****/ |
---|
746 | std::unique_ptr<std::ostream> outStreamHolder; |
---|
747 | std::ostream* os = nullptr; |
---|
748 | if (aPtr != nullptr) |
---|
749 | { |
---|
750 | aPtr->resize(binarySize); |
---|
751 | outStreamHolder.reset( |
---|
752 | new ArrayOStream(binarySize, reinterpret_cast<char*>(aPtr->data()))); |
---|
753 | os = outStreamHolder.get(); |
---|
754 | } |
---|
755 | else if (vPtr != nullptr) |
---|
756 | { |
---|
757 | vPtr->resize(binarySize); |
---|
758 | outStreamHolder.reset(new VectorOStream(*vPtr)); |
---|
759 | os = outStreamHolder.get(); |
---|
760 | } |
---|
761 | else // from argument |
---|
762 | os = osPtr; |
---|
763 | |
---|
764 | const std::ios::iostate oldExceptions = os->exceptions(); |
---|
765 | try |
---|
766 | { |
---|
767 | os->exceptions(std::ios::failbit | std::ios::badbit); |
---|
768 | /**** |
---|
769 | * write binary to output |
---|
770 | ****/ |
---|
771 | FastOutputBuffer bos(256, *os); |
---|
772 | elfBinGen64->generate(bos); |
---|
773 | assert(bos.getWritten() == binarySize); |
---|
774 | |
---|
775 | if (rocmGotGen != nullptr) |
---|
776 | { |
---|
777 | delete (ROCmGotGen*)rocmGotGen; |
---|
778 | rocmGotGen = nullptr; |
---|
779 | } |
---|
780 | if (rocmRelaDynGen != nullptr) |
---|
781 | { |
---|
782 | delete (ROCmGotGen*)rocmRelaDynGen; |
---|
783 | rocmRelaDynGen = nullptr; |
---|
784 | } |
---|
785 | } |
---|
786 | catch(...) |
---|
787 | { |
---|
788 | os->exceptions(oldExceptions); |
---|
789 | throw; |
---|
790 | } |
---|
791 | os->exceptions(oldExceptions); |
---|
792 | } |
---|
793 | |
---|
794 | void ROCmBinGenerator::generate(Array<cxbyte>& array) |
---|
795 | { |
---|
796 | generateInternal(nullptr, nullptr, &array); |
---|
797 | } |
---|
798 | |
---|
799 | void ROCmBinGenerator::generate(std::ostream& os) |
---|
800 | { |
---|
801 | generateInternal(&os, nullptr, nullptr); |
---|
802 | } |
---|
803 | |
---|
804 | void ROCmBinGenerator::generate(std::vector<char>& v) |
---|
805 | { |
---|
806 | generateInternal(nullptr, &v, nullptr); |
---|
807 | } |
---|