1 | /* |
---|
2 | * CLRadeonExtender - Unofficial OpenCL Radeon Extensions Library |
---|
3 | * Copyright (C) 2014-2018 Mateusz Szpakowski |
---|
4 | * |
---|
5 | * This library is free software; you can redistribute it and/or |
---|
6 | * modify it under the terms of the GNU Lesser General Public |
---|
7 | * License as published by the Free Software Foundation; either |
---|
8 | * version 2.1 of the License, or (at your option) any later version. |
---|
9 | * |
---|
10 | * This library is distributed in the hope that it will be useful, |
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
13 | * Lesser General Public License for more details. |
---|
14 | * |
---|
15 | * You should have received a copy of the GNU Lesser General Public |
---|
16 | * License along with this library; if not, write to the Free Software |
---|
17 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
---|
18 | */ |
---|
19 | |
---|
20 | #include <CLRX/Config.h> |
---|
21 | #include <cassert> |
---|
22 | #include <cstdio> |
---|
23 | #include <cstring> |
---|
24 | #include <cstdint> |
---|
25 | #include <string> |
---|
26 | #include <vector> |
---|
27 | #include <algorithm> |
---|
28 | #include <utility> |
---|
29 | #include <CLRX/amdbin/ElfBinaries.h> |
---|
30 | #include <CLRX/utils/Utilities.h> |
---|
31 | #include <CLRX/utils/MemAccess.h> |
---|
32 | #include <CLRX/utils/InputOutput.h> |
---|
33 | #include <CLRX/utils/Containers.h> |
---|
34 | #include <CLRX/amdbin/ROCmBinaries.h> |
---|
35 | |
---|
36 | using namespace CLRX; |
---|
37 | |
---|
38 | /* |
---|
39 | * ROCm metadata YAML parser |
---|
40 | */ |
---|
41 | |
---|
42 | ROCmKernelMetadata::ROCmKernelMetadata() : |
---|
43 | langVersion{ BINGEN_NOTSUPPLIED, BINGEN_NOTSUPPLIED }, |
---|
44 | reqdWorkGroupSize{ BINGEN_NOTSUPPLIED, BINGEN_NOTSUPPLIED, BINGEN_NOTSUPPLIED }, |
---|
45 | workGroupSizeHint{ BINGEN_NOTSUPPLIED, BINGEN_NOTSUPPLIED, BINGEN_NOTSUPPLIED }, |
---|
46 | kernargSegmentSize(BINGEN64_NOTSUPPLIED), |
---|
47 | groupSegmentFixedSize(BINGEN64_NOTSUPPLIED), |
---|
48 | privateSegmentFixedSize(BINGEN64_NOTSUPPLIED), |
---|
49 | kernargSegmentAlign(BINGEN64_NOTSUPPLIED), |
---|
50 | wavefrontSize(BINGEN_NOTSUPPLIED), |
---|
51 | sgprsNum(BINGEN_NOTSUPPLIED), vgprsNum(BINGEN_NOTSUPPLIED), |
---|
52 | maxFlatWorkGroupSize(BINGEN64_NOTSUPPLIED), |
---|
53 | fixedWorkGroupSize{ BINGEN_NOTSUPPLIED, BINGEN_NOTSUPPLIED, BINGEN_NOTSUPPLIED }, |
---|
54 | spilledSgprs(BINGEN_NOTSUPPLIED), |
---|
55 | spilledVgprs(BINGEN_NOTSUPPLIED) |
---|
56 | { } |
---|
57 | |
---|
58 | ROCmMetadata::ROCmMetadata() : version{ 0, 0 } |
---|
59 | { } |
---|
60 | |
---|
61 | // return trailing spaces |
---|
62 | static size_t skipSpacesAndComments(const char*& ptr, const char* end, size_t& lineNo) |
---|
63 | { |
---|
64 | const char* lineStart = ptr; |
---|
65 | while (ptr != end) |
---|
66 | { |
---|
67 | lineStart = ptr; |
---|
68 | while (ptr != end && *ptr!='\n' && isSpace(*ptr)) ptr++; |
---|
69 | if (ptr == end) |
---|
70 | break; // end of stream |
---|
71 | if (*ptr=='#') |
---|
72 | { |
---|
73 | // skip comment |
---|
74 | while (ptr != end && *ptr!='\n') ptr++; |
---|
75 | if (ptr == end) |
---|
76 | return 0; // no trailing spaces and end |
---|
77 | } |
---|
78 | else if (*ptr!='\n') |
---|
79 | break; // no comment and no end of line |
---|
80 | else |
---|
81 | { |
---|
82 | ptr++; |
---|
83 | lineNo++; // next line |
---|
84 | } |
---|
85 | } |
---|
86 | return ptr - lineStart; |
---|
87 | } |
---|
88 | |
---|
89 | static inline void skipSpacesToLineEnd(const char*& ptr, const char* end) |
---|
90 | { |
---|
91 | while (ptr != end && *ptr!='\n' && isSpace(*ptr)) ptr++; |
---|
92 | } |
---|
93 | |
---|
94 | static void skipSpacesToNextLine(const char*& ptr, const char* end, size_t& lineNo) |
---|
95 | { |
---|
96 | skipSpacesToLineEnd(ptr, end); |
---|
97 | if (ptr != end && *ptr != '\n' && *ptr!='#') |
---|
98 | throw ParseException(lineNo, "Garbages at line"); |
---|
99 | if (ptr != end && *ptr == '#') |
---|
100 | // skip comment at end of line |
---|
101 | while (ptr!=end && *ptr!='\n') ptr++; |
---|
102 | if (ptr!=end) |
---|
103 | { // newline |
---|
104 | ptr++; |
---|
105 | lineNo++; |
---|
106 | } |
---|
107 | } |
---|
108 | |
---|
109 | static size_t parseYAMLKey(const char*& ptr, const char* end, size_t lineNo, |
---|
110 | size_t keywordsNum, const char** keywords) |
---|
111 | { |
---|
112 | const char* keyPtr = ptr; |
---|
113 | while (ptr != end && (isAlnum(*ptr) || *ptr=='_')) ptr++; |
---|
114 | if (keyPtr == end) |
---|
115 | throw ParseException(lineNo, "Expected key name"); |
---|
116 | const char* keyEnd = ptr; |
---|
117 | while (ptr != end && *ptr!='\n' && isSpace(*ptr)) ptr++; |
---|
118 | if (ptr == end || *ptr!=':') |
---|
119 | throw ParseException(lineNo, "Expected colon"); |
---|
120 | ptr++; |
---|
121 | const char* afterColon = ptr; |
---|
122 | skipSpacesToLineEnd(ptr, end); |
---|
123 | if (afterColon == ptr && ptr != end && *ptr!='\n' && *ptr!='#') |
---|
124 | // only if not immediate newline or comment |
---|
125 | throw ParseException(lineNo, "After key and colon must be space"); |
---|
126 | CString keyword(keyPtr, keyEnd); |
---|
127 | const size_t index = binaryFind(keywords, keywords+keywordsNum, |
---|
128 | keyword.c_str(), CStringLess()) - keywords; |
---|
129 | return index; |
---|
130 | } |
---|
131 | |
---|
132 | template<typename T> |
---|
133 | static T parseYAMLIntValue(const char*& ptr, const char* end, size_t& lineNo, |
---|
134 | bool singleValue = false) |
---|
135 | { |
---|
136 | skipSpacesToLineEnd(ptr, end); |
---|
137 | if (ptr == end || *ptr=='\n') |
---|
138 | throw ParseException(lineNo, "Expected integer value"); |
---|
139 | T value = 0; |
---|
140 | try |
---|
141 | { value = cstrtovCStyle<T>(ptr, end, ptr); } |
---|
142 | catch(const ParseException& ex) |
---|
143 | { throw ParseException(lineNo, ex.what()); } |
---|
144 | |
---|
145 | if (singleValue) |
---|
146 | skipSpacesToNextLine(ptr, end, lineNo); |
---|
147 | return value; |
---|
148 | } |
---|
149 | |
---|
150 | static bool parseYAMLBoolValue(const char*& ptr, const char* end, size_t& lineNo, |
---|
151 | bool singleValue = false) |
---|
152 | { |
---|
153 | skipSpacesToLineEnd(ptr, end); |
---|
154 | if (ptr == end || *ptr=='\n') |
---|
155 | throw ParseException(lineNo, "Expected boolean value"); |
---|
156 | |
---|
157 | const char* wordPtr = ptr; |
---|
158 | while(ptr != end && isAlnum(*ptr)) ptr++; |
---|
159 | CString word(wordPtr, ptr); |
---|
160 | |
---|
161 | bool value = false; |
---|
162 | bool isSet = false; |
---|
163 | for (const char* v: { "1", "true", "t", "on", "yes", "y"}) |
---|
164 | if (::strcasecmp(word.c_str(), v) == 0) |
---|
165 | { |
---|
166 | isSet = true; |
---|
167 | value = true; |
---|
168 | break; |
---|
169 | } |
---|
170 | if (!isSet) |
---|
171 | for (const char* v: { "0", "false", "f", "off", "no", "n"}) |
---|
172 | if (::strcasecmp(word.c_str(), v) == 0) |
---|
173 | { |
---|
174 | isSet = true; |
---|
175 | value = false; |
---|
176 | break; |
---|
177 | } |
---|
178 | if (!isSet) |
---|
179 | throw ParseException(lineNo, "Is not boolean value"); |
---|
180 | |
---|
181 | if (singleValue) |
---|
182 | skipSpacesToNextLine(ptr, end, lineNo); |
---|
183 | return value; |
---|
184 | } |
---|
185 | |
---|
186 | static std::string trimStrSpaces(const std::string& str) |
---|
187 | { |
---|
188 | size_t i = 0; |
---|
189 | const size_t sz = str.size(); |
---|
190 | while (i!=sz && isSpace(str[i])) i++; |
---|
191 | if (i == sz) return ""; |
---|
192 | size_t j = sz-1; |
---|
193 | while (j>i && isSpace(str[j])) j--; |
---|
194 | return str.substr(i, j-i+1); |
---|
195 | } |
---|
196 | |
---|
197 | static std::string parseYAMLString(const char*& linePtr, const char* end, |
---|
198 | size_t& lineNo) |
---|
199 | { |
---|
200 | std::string strarray; |
---|
201 | if (linePtr == end || (*linePtr != '"' && *linePtr != '\'')) |
---|
202 | { |
---|
203 | while (linePtr != end && !isSpace(*linePtr) && *linePtr != ',') linePtr++; |
---|
204 | throw ParseException(lineNo, "Expected string"); |
---|
205 | } |
---|
206 | const char termChar = *linePtr; |
---|
207 | linePtr++; |
---|
208 | |
---|
209 | // main loop, where is character parsing |
---|
210 | while (linePtr != end && *linePtr != termChar) |
---|
211 | { |
---|
212 | if (*linePtr == '\\') |
---|
213 | { |
---|
214 | // escape |
---|
215 | linePtr++; |
---|
216 | uint16_t value; |
---|
217 | if (linePtr == end) |
---|
218 | throw ParseException(lineNo, "Unterminated character of string"); |
---|
219 | if (*linePtr == 'x') |
---|
220 | { |
---|
221 | // hex literal |
---|
222 | linePtr++; |
---|
223 | if (linePtr == end) |
---|
224 | throw ParseException(lineNo, "Unterminated character of string"); |
---|
225 | value = 0; |
---|
226 | if (isXDigit(*linePtr)) |
---|
227 | for (; linePtr != end; linePtr++) |
---|
228 | { |
---|
229 | cxuint digit; |
---|
230 | if (*linePtr >= '0' && *linePtr <= '9') |
---|
231 | digit = *linePtr-'0'; |
---|
232 | else if (*linePtr >= 'a' && *linePtr <= 'f') |
---|
233 | digit = *linePtr-'a'+10; |
---|
234 | else if (*linePtr >= 'A' && *linePtr <= 'F') |
---|
235 | digit = *linePtr-'A'+10; |
---|
236 | else |
---|
237 | break; |
---|
238 | value = (value<<4) + digit; |
---|
239 | } |
---|
240 | else |
---|
241 | throw ParseException(lineNo, "Expected hexadecimal character code"); |
---|
242 | value &= 0xff; |
---|
243 | } |
---|
244 | else if (isODigit(*linePtr)) |
---|
245 | { |
---|
246 | // octal literal |
---|
247 | value = 0; |
---|
248 | for (cxuint i = 0; linePtr != end && i < 3; i++, linePtr++) |
---|
249 | { |
---|
250 | if (!isODigit(*linePtr)) |
---|
251 | break; |
---|
252 | value = (value<<3) + uint64_t(*linePtr-'0'); |
---|
253 | // checking range |
---|
254 | if (value > 255) |
---|
255 | throw ParseException(lineNo, "Octal code out of range"); |
---|
256 | } |
---|
257 | } |
---|
258 | else |
---|
259 | { |
---|
260 | // normal escapes |
---|
261 | const char c = *linePtr++; |
---|
262 | switch (c) |
---|
263 | { |
---|
264 | case 'a': |
---|
265 | value = '\a'; |
---|
266 | break; |
---|
267 | case 'b': |
---|
268 | value = '\b'; |
---|
269 | break; |
---|
270 | case 'r': |
---|
271 | value = '\r'; |
---|
272 | break; |
---|
273 | case 'n': |
---|
274 | value = '\n'; |
---|
275 | break; |
---|
276 | case 'f': |
---|
277 | value = '\f'; |
---|
278 | break; |
---|
279 | case 'v': |
---|
280 | value = '\v'; |
---|
281 | break; |
---|
282 | case 't': |
---|
283 | value = '\t'; |
---|
284 | break; |
---|
285 | case '\\': |
---|
286 | value = '\\'; |
---|
287 | break; |
---|
288 | case '\'': |
---|
289 | value = '\''; |
---|
290 | break; |
---|
291 | case '\"': |
---|
292 | value = '\"'; |
---|
293 | break; |
---|
294 | default: |
---|
295 | value = c; |
---|
296 | } |
---|
297 | } |
---|
298 | strarray.push_back(value); |
---|
299 | } |
---|
300 | else // regular character |
---|
301 | { |
---|
302 | if (*linePtr=='\n') |
---|
303 | lineNo++; |
---|
304 | strarray.push_back(*linePtr++); |
---|
305 | } |
---|
306 | } |
---|
307 | if (linePtr == end) |
---|
308 | throw ParseException(lineNo, "Unterminated string"); |
---|
309 | linePtr++; |
---|
310 | return strarray; |
---|
311 | } |
---|
312 | |
---|
313 | static std::string parseYAMLStringValue(const char*& ptr, const char* end, size_t& lineNo, |
---|
314 | cxuint prevIndent, bool singleValue = false) |
---|
315 | { |
---|
316 | skipSpacesToLineEnd(ptr, end); |
---|
317 | if (ptr == end) |
---|
318 | return ""; |
---|
319 | std::string buf; |
---|
320 | if (*ptr=='"' || *ptr== '\'') |
---|
321 | buf = parseYAMLString(ptr, end, lineNo); |
---|
322 | // otherwise parse stream |
---|
323 | else if (*ptr == '|' || *ptr == '>') |
---|
324 | { |
---|
325 | // multiline |
---|
326 | bool newLineFold = *ptr=='>'; |
---|
327 | while (ptr != end && *ptr!='\n') ptr++; |
---|
328 | if (ptr == end) |
---|
329 | return ""; // end |
---|
330 | lineNo++; |
---|
331 | ptr++; // skip newline |
---|
332 | const char* lineStart = ptr; |
---|
333 | skipSpacesToLineEnd(ptr, end); |
---|
334 | size_t indent = ptr - lineStart; |
---|
335 | if (indent <= prevIndent) |
---|
336 | throw ParseException(lineNo, "Unindented string block"); |
---|
337 | |
---|
338 | std::string buf; |
---|
339 | while(ptr != end) |
---|
340 | { |
---|
341 | const char* strStart = ptr; |
---|
342 | while (ptr != end && *ptr!='\n') ptr++; |
---|
343 | buf.append(strStart, ptr); |
---|
344 | |
---|
345 | if (ptr != end) // if new line |
---|
346 | { |
---|
347 | lineNo++; |
---|
348 | ptr++; |
---|
349 | } |
---|
350 | else // end of stream |
---|
351 | break; |
---|
352 | |
---|
353 | const char* lineStart = ptr; |
---|
354 | skipSpacesToLineEnd(ptr, end); |
---|
355 | bool emptyLines = false; |
---|
356 | while (size_t(ptr - lineStart) <= indent) |
---|
357 | { |
---|
358 | if (ptr != end && *ptr=='\n') |
---|
359 | { |
---|
360 | // empty line |
---|
361 | buf.append("\n"); |
---|
362 | ptr++; |
---|
363 | lineNo++; |
---|
364 | lineStart = ptr; |
---|
365 | skipSpacesToLineEnd(ptr, end); |
---|
366 | emptyLines = true; |
---|
367 | continue; |
---|
368 | } |
---|
369 | // if smaller indent |
---|
370 | if (size_t(ptr - lineStart) < indent) |
---|
371 | { |
---|
372 | buf.append("\n"); // always add newline at last line |
---|
373 | if (ptr != end) |
---|
374 | ptr = lineStart; |
---|
375 | return buf; |
---|
376 | } |
---|
377 | else // if this same and not end of line |
---|
378 | break; |
---|
379 | } |
---|
380 | |
---|
381 | if (!emptyLines || !newLineFold) |
---|
382 | // add missing newline after line with text |
---|
383 | // only if no emptyLines or no newLineFold |
---|
384 | buf.append(newLineFold ? " " : "\n"); |
---|
385 | // to indent |
---|
386 | ptr = lineStart + indent; |
---|
387 | } |
---|
388 | return buf; |
---|
389 | } |
---|
390 | else |
---|
391 | { |
---|
392 | // single line string (unquoted) |
---|
393 | const char* strStart = ptr; |
---|
394 | // automatically trim spaces at ends |
---|
395 | const char* strEnd = ptr; |
---|
396 | while (ptr != end && *ptr!='\n' && *ptr!='#') |
---|
397 | { |
---|
398 | if (!isSpace(*ptr)) |
---|
399 | strEnd = ptr; // to trim at end |
---|
400 | ptr++; |
---|
401 | } |
---|
402 | if (strEnd != end && !isSpace(*strEnd)) |
---|
403 | strEnd++; |
---|
404 | |
---|
405 | buf.assign(strStart, strEnd); |
---|
406 | } |
---|
407 | |
---|
408 | if (singleValue) |
---|
409 | skipSpacesToNextLine(ptr, end, lineNo); |
---|
410 | return buf; |
---|
411 | } |
---|
412 | |
---|
413 | class CLRX_INTERNAL YAMLElemConsumer |
---|
414 | { |
---|
415 | public: |
---|
416 | virtual void consume(const char*& ptr, const char* end, size_t& lineNo, |
---|
417 | cxuint prevIndent, bool singleValue) = 0; |
---|
418 | }; |
---|
419 | |
---|
420 | static void parseYAMLValArray(const char*& ptr, const char* end, size_t& lineNo, |
---|
421 | size_t prevIndent, YAMLElemConsumer* elemConsumer, bool singleValue = false) |
---|
422 | { |
---|
423 | skipSpacesToLineEnd(ptr, end); |
---|
424 | if (ptr == end) |
---|
425 | return; |
---|
426 | |
---|
427 | if (*ptr == '[') |
---|
428 | { |
---|
429 | ptr++; |
---|
430 | skipSpacesAndComments(ptr, end, lineNo); |
---|
431 | while (ptr != end) |
---|
432 | { |
---|
433 | // parse in line |
---|
434 | elemConsumer->consume(ptr, end, lineNo, 0, false); |
---|
435 | skipSpacesAndComments(ptr, end, lineNo); |
---|
436 | if (ptr!=end && *ptr==']') |
---|
437 | // just end |
---|
438 | break; |
---|
439 | else if (ptr==end || *ptr!=',') |
---|
440 | throw ParseException(lineNo, "Expected ','"); |
---|
441 | ptr++; |
---|
442 | skipSpacesAndComments(ptr, end, lineNo); |
---|
443 | } |
---|
444 | if (ptr == end) |
---|
445 | throw ParseException(lineNo, "Unterminated array"); |
---|
446 | ptr++; |
---|
447 | |
---|
448 | if (singleValue) |
---|
449 | skipSpacesToNextLine(ptr, end, lineNo); |
---|
450 | return; |
---|
451 | } |
---|
452 | // sequence |
---|
453 | size_t oldLineNo = lineNo; |
---|
454 | size_t indent0 = skipSpacesAndComments(ptr, end, lineNo); |
---|
455 | if (ptr == end || lineNo == oldLineNo) |
---|
456 | throw ParseException(lineNo, "Expected sequence of values"); |
---|
457 | |
---|
458 | if (indent0 < prevIndent) |
---|
459 | throw ParseException(lineNo, "Unindented sequence of objects"); |
---|
460 | |
---|
461 | while (ptr != end) |
---|
462 | { |
---|
463 | if (*ptr != '-') |
---|
464 | throw ParseException(lineNo, "No '-' before element value"); |
---|
465 | ptr++; |
---|
466 | const char* afterMinus = ptr; |
---|
467 | skipSpacesToLineEnd(ptr, end); |
---|
468 | if (afterMinus == ptr) |
---|
469 | throw ParseException(lineNo, "No spaces after '-'"); |
---|
470 | elemConsumer->consume(ptr, end, lineNo, indent0+1 + ptr-afterMinus, true); |
---|
471 | |
---|
472 | size_t indent = skipSpacesAndComments(ptr, end, lineNo); |
---|
473 | if (indent < indent0) |
---|
474 | { |
---|
475 | // if parent level |
---|
476 | ptr -= indent; |
---|
477 | break; |
---|
478 | } |
---|
479 | if (indent != indent0) |
---|
480 | throw ParseException(lineNo, "Wrong indentation of element"); |
---|
481 | } |
---|
482 | } |
---|
483 | |
---|
484 | template<typename T> |
---|
485 | class CLRX_INTERNAL YAMLIntArrayConsumer: public YAMLElemConsumer |
---|
486 | { |
---|
487 | private: |
---|
488 | size_t elemsNum; |
---|
489 | size_t requiredElemsNum; |
---|
490 | public: |
---|
491 | T* array; |
---|
492 | |
---|
493 | YAMLIntArrayConsumer(size_t reqElemsNum, T* _array) |
---|
494 | : elemsNum(0), requiredElemsNum(reqElemsNum), array(_array) |
---|
495 | { } |
---|
496 | |
---|
497 | virtual void consume(const char*& ptr, const char* end, size_t& lineNo, |
---|
498 | cxuint prevIndent, bool singleValue) |
---|
499 | { |
---|
500 | if (elemsNum == requiredElemsNum) |
---|
501 | throw ParseException(lineNo, "Too many elements"); |
---|
502 | try |
---|
503 | { array[elemsNum] = cstrtovCStyle<T>(ptr, end, ptr); } |
---|
504 | catch(const ParseException& ex) |
---|
505 | { throw ParseException(lineNo, ex.what()); } |
---|
506 | elemsNum++; |
---|
507 | if (singleValue) |
---|
508 | skipSpacesToNextLine(ptr, end, lineNo); |
---|
509 | } |
---|
510 | }; |
---|
511 | |
---|
512 | // printf info string consumer |
---|
513 | |
---|
514 | class CLRX_INTERNAL YAMLPrintfVectorConsumer: public YAMLElemConsumer |
---|
515 | { |
---|
516 | public: |
---|
517 | std::vector<ROCmPrintfInfo>& printfInfos; |
---|
518 | |
---|
519 | YAMLPrintfVectorConsumer(std::vector<ROCmPrintfInfo>& _printInfos) |
---|
520 | : printfInfos(_printInfos) |
---|
521 | { } |
---|
522 | |
---|
523 | virtual void consume(const char*& ptr, const char* end, size_t& lineNo, |
---|
524 | cxuint prevIndent, bool singleValue) |
---|
525 | { |
---|
526 | const size_t oldLineNo = lineNo; |
---|
527 | std::string str = parseYAMLStringValue(ptr, end, lineNo, prevIndent, singleValue); |
---|
528 | // parse printf string |
---|
529 | ROCmPrintfInfo printfInfo{}; |
---|
530 | |
---|
531 | const char* ptr2 = str.c_str(); |
---|
532 | const char* end2 = str.c_str() + str.size(); |
---|
533 | skipSpacesToLineEnd(ptr2, end2); |
---|
534 | try |
---|
535 | { printfInfo.id = cstrtovCStyle<uint32_t>(ptr2, end2, ptr2); } |
---|
536 | catch(const ParseException& ex) |
---|
537 | { throw ParseException(oldLineNo, ex.what()); } |
---|
538 | skipSpacesToLineEnd(ptr2, end2); |
---|
539 | if (ptr2==end || *ptr2!=':') |
---|
540 | throw ParseException(oldLineNo, "No colon after printf callId"); |
---|
541 | ptr2++; |
---|
542 | skipSpacesToLineEnd(ptr2, end2); |
---|
543 | uint32_t argsNum = cstrtovCStyle<uint32_t>(ptr2, end2, ptr2); |
---|
544 | skipSpacesToLineEnd(ptr2, end2); |
---|
545 | if (ptr2==end || *ptr2!=':') |
---|
546 | throw ParseException(oldLineNo, "No colon after printf argsNum"); |
---|
547 | ptr2++; |
---|
548 | |
---|
549 | printfInfo.argSizes.resize(argsNum); |
---|
550 | |
---|
551 | // parse arg sizes |
---|
552 | for (size_t i = 0; i < argsNum; i++) |
---|
553 | { |
---|
554 | skipSpacesToLineEnd(ptr2, end2); |
---|
555 | printfInfo.argSizes[i] = cstrtovCStyle<uint32_t>(ptr2, end2, ptr2); |
---|
556 | skipSpacesToLineEnd(ptr2, end2); |
---|
557 | if (ptr2==end || *ptr2!=':') |
---|
558 | throw ParseException(lineNo, "No colon after printf argsNum"); |
---|
559 | ptr2++; |
---|
560 | } |
---|
561 | // format |
---|
562 | printfInfo.format.assign(ptr2, end2); |
---|
563 | |
---|
564 | printfInfos.push_back(printfInfo); |
---|
565 | } |
---|
566 | }; |
---|
567 | |
---|
568 | static void skipYAMLValue(const char* ptr, const char* end, size_t& lineNo, |
---|
569 | cxuint prevIndent) |
---|
570 | { |
---|
571 | skipSpacesToLineEnd(ptr, end); |
---|
572 | if (ptr == end || *ptr=='\n') |
---|
573 | return; |
---|
574 | if (ptr==end || (*ptr!='\'' && *ptr!='"' && *ptr!='|' && *ptr!='>' && *ptr !='[')) |
---|
575 | { |
---|
576 | skipSpacesToLineEnd(ptr, end); |
---|
577 | if (ptr!=end) ptr++; |
---|
578 | return; |
---|
579 | } |
---|
580 | // string |
---|
581 | if (*ptr=='\'' || *ptr=='"') |
---|
582 | { |
---|
583 | const char delim = *ptr++; |
---|
584 | bool escape = false; |
---|
585 | while(ptr!=end && (escape || *ptr!=delim)) |
---|
586 | { |
---|
587 | if (!escape && *ptr=='\\') |
---|
588 | escape = true; |
---|
589 | else if (escape) |
---|
590 | escape = false; |
---|
591 | if (*ptr=='\n') lineNo++; |
---|
592 | ptr++; |
---|
593 | } |
---|
594 | if (ptr==end) |
---|
595 | throw ParseException(lineNo, "Unterminated string"); |
---|
596 | ptr++; |
---|
597 | skipSpacesToNextLine(ptr, end, lineNo); |
---|
598 | } |
---|
599 | else if (*ptr=='[') |
---|
600 | { // otherwise [array] |
---|
601 | ptr++; |
---|
602 | skipSpacesAndComments(ptr, end, lineNo); |
---|
603 | while (ptr != end) |
---|
604 | { |
---|
605 | // parse in line |
---|
606 | skipYAMLValue(ptr, end, lineNo, 0); |
---|
607 | if (ptr!=end && *ptr==',') |
---|
608 | throw ParseException(lineNo, "Expected ','"); |
---|
609 | else if (ptr!=end && *ptr==']') |
---|
610 | // just end |
---|
611 | break; |
---|
612 | ptr++; |
---|
613 | skipSpacesAndComments(ptr, end, lineNo); |
---|
614 | } |
---|
615 | if (ptr == end) |
---|
616 | throw ParseException(lineNo, "Unterminated array"); |
---|
617 | ptr++; |
---|
618 | skipSpacesToNextLine(ptr, end, lineNo); |
---|
619 | } |
---|
620 | else |
---|
621 | { // block value |
---|
622 | if (ptr!=end && (*ptr=='|' || *ptr=='>')) |
---|
623 | ptr++; // skip '|' or '>' |
---|
624 | skipSpacesToLineEnd(ptr, end); |
---|
625 | if (ptr!=end && *ptr!='\n') |
---|
626 | throw ParseException(lineNo, "Garbages before block or children"); |
---|
627 | ptr++; |
---|
628 | lineNo++; |
---|
629 | // skip all lines indented beyound previous level |
---|
630 | while (ptr != end) |
---|
631 | { |
---|
632 | const char* lineStart = ptr; |
---|
633 | skipSpacesToLineEnd(ptr, end); |
---|
634 | if (ptr == end) |
---|
635 | { |
---|
636 | ptr++; |
---|
637 | lineNo++; |
---|
638 | continue; |
---|
639 | } |
---|
640 | if (ptr-lineStart < prevIndent) |
---|
641 | { |
---|
642 | ptr = lineStart; |
---|
643 | break; |
---|
644 | } |
---|
645 | } |
---|
646 | } |
---|
647 | } |
---|
648 | |
---|
649 | enum { |
---|
650 | ROCMMT_MAIN_KERNELS = 0, ROCMMT_MAIN_PRINTF, ROCMMT_MAIN_VERSION |
---|
651 | }; |
---|
652 | |
---|
653 | static const char* mainMetadataKeywords[] = |
---|
654 | { |
---|
655 | "Kernels", "Printf", "Version" |
---|
656 | }; |
---|
657 | |
---|
658 | static const size_t mainMetadataKeywordsNum = |
---|
659 | sizeof(mainMetadataKeywords) / sizeof(const char*); |
---|
660 | |
---|
661 | enum { |
---|
662 | ROCMMT_KERNEL_ARGS = 0, ROCMMT_KERNEL_ATTRS, ROCMMT_KERNEL_CODEPROPS, |
---|
663 | ROCMMT_KERNEL_LANGUAGE, ROCMMT_KERNEL_LANGUAGE_VERSION, |
---|
664 | ROCMMT_KERNEL_NAME, ROCMMT_KERNEL_SYMBOLNAME |
---|
665 | }; |
---|
666 | |
---|
667 | static const char* kernelMetadataKeywords[] = |
---|
668 | { |
---|
669 | "Args", "Attrs", "CodeProps", "Language", "LanguageVersion", "Name", "SymbolName" |
---|
670 | }; |
---|
671 | |
---|
672 | static const size_t kernelMetadataKeywordsNum = |
---|
673 | sizeof(kernelMetadataKeywords) / sizeof(const char*); |
---|
674 | |
---|
675 | enum { |
---|
676 | ROCMMT_ATTRS_REQD_WORK_GROUP_SIZE = 0, ROCMMT_ATTRS_RUNTIME_HANDLE, |
---|
677 | ROCMMT_ATTRS_VECTYPEHINT, ROCMMT_ATTRS_WORK_GROUP_SIZE_HINT |
---|
678 | }; |
---|
679 | |
---|
680 | static const char* kernelAttrMetadataKeywords[] = |
---|
681 | { |
---|
682 | "ReqdWorkGroupSize", "RuntimeHandle", "VecTypeHint", "WorkGroupSizeHint" |
---|
683 | }; |
---|
684 | |
---|
685 | static const size_t kernelAttrMetadataKeywordsNum = |
---|
686 | sizeof(kernelAttrMetadataKeywords) / sizeof(const char*); |
---|
687 | |
---|
688 | enum { |
---|
689 | ROCMMT_CODEPROPS_FIXED_WORK_GROUP_SIZE = 0, ROCMMT_CODEPROPS_GROUP_SEGMENT_FIXED_SIZE, |
---|
690 | ROCMMT_CODEPROPS_KERNARG_SEGMENT_ALIGN, ROCMMT_CODEPROPS_KERNARG_SEGMENT_SIZE, |
---|
691 | ROCMMT_CODEPROPS_MAX_FLAT_WORK_GROUP_SIZE, ROCMMT_CODEPROPS_NUM_SGPRS, |
---|
692 | ROCMMT_CODEPROPS_NUM_SPILLED_SGPRS, ROCMMT_CODEPROPS_NUM_SPILLED_VGPRS, |
---|
693 | ROCMMT_CODEPROPS_NUM_VGPRS, ROCMMT_CODEPROPS_PRIVATE_SEGMENT_FIXED_SIZE, |
---|
694 | ROCMMT_CODEPROPS_WAVEFRONT_SIZE |
---|
695 | }; |
---|
696 | |
---|
697 | static const char* kernelCodePropsKeywords[] = |
---|
698 | { |
---|
699 | "FixedWorkGroupSize", "GroupSegmentFixedSize", "KernargSegmentAlign", |
---|
700 | "KernargSegmentSize", "MaxFlatWorkGroupSize", "NumSGPRs", |
---|
701 | "NumSpilledSGPRs", "NumSpilledVGPRs", "NumVGPRs", "PrivateSegmentFixedSize", |
---|
702 | "WavefrontSize" |
---|
703 | }; |
---|
704 | |
---|
705 | static const size_t kernelCodePropsKeywordsNum = |
---|
706 | sizeof(kernelCodePropsKeywords) / sizeof(const char*); |
---|
707 | |
---|
708 | enum { |
---|
709 | ROCMMT_ARGS_ACCQUAL = 0, ROCMMT_ARGS_ACTUALACCQUAL, ROCMMT_ARGS_ADDRSPACEQUAL, |
---|
710 | ROCMMT_ARGS_ALIGN, ROCMMT_ARGS_ISCONST, ROCMMT_ARGS_ISPIPE, ROCMMT_ARGS_ISRESTRICT, |
---|
711 | ROCMMT_ARGS_ISVOLATILE, ROCMMT_ARGS_NAME, ROCMMT_ARGS_POINTEE_ALIGN, |
---|
712 | ROCMMT_ARGS_SIZE, ROCMMT_ARGS_TYPENAME, ROCMMT_ARGS_VALUEKIND, |
---|
713 | ROCMMT_ARGS_VALUETYPE |
---|
714 | }; |
---|
715 | |
---|
716 | static const char* kernelArgInfosKeywords[] = |
---|
717 | { |
---|
718 | "AccQual", "ActualAccQual", "AddrSpaceQual", "Align", "IsConst", "IsPipe", |
---|
719 | "IsRestrict", "IsVolatile", "Name", "PointeeAlign", "Size", "TypeName", |
---|
720 | "ValueKind", "ValueType" |
---|
721 | }; |
---|
722 | |
---|
723 | static const size_t kernelArgInfosKeywordsNum = |
---|
724 | sizeof(kernelArgInfosKeywords) / sizeof(const char*); |
---|
725 | |
---|
726 | static const std::pair<const char*, ROCmValueKind> rocmValueKindNames[] = |
---|
727 | { |
---|
728 | { "ByValue", ROCmValueKind::BY_VALUE }, |
---|
729 | { "DynamicSharedPointer", ROCmValueKind::DYN_SHARED_PTR }, |
---|
730 | { "GlobalBuffer", ROCmValueKind::GLOBAL_BUFFER }, |
---|
731 | { "HiddenCompletionAction", ROCmValueKind::HIDDEN_COMPLETION_ACTION }, |
---|
732 | { "HiddenDefaultQueue", ROCmValueKind::HIDDEN_DEFAULT_QUEUE }, |
---|
733 | { "HiddenGlobalOffsetX", ROCmValueKind::HIDDEN_GLOBAL_OFFSET_X }, |
---|
734 | { "HiddenGlobalOffsetY", ROCmValueKind::HIDDEN_GLOBAL_OFFSET_Y }, |
---|
735 | { "HiddenGlobalOffsetZ", ROCmValueKind::HIDDEN_GLOBAL_OFFSET_Z }, |
---|
736 | { "HiddenNone", ROCmValueKind::HIDDEN_NONE }, |
---|
737 | { "HiddenPrintfBuffer", ROCmValueKind::HIDDEN_PRINTF_BUFFER }, |
---|
738 | { "Image", ROCmValueKind::IMAGE }, |
---|
739 | { "Pipe", ROCmValueKind::PIPE }, |
---|
740 | { "Queue", ROCmValueKind::QUEUE }, |
---|
741 | { "Sampler", ROCmValueKind::SAMPLER } |
---|
742 | }; |
---|
743 | |
---|
744 | static const size_t rocmValueKindNamesNum = |
---|
745 | sizeof(rocmValueKindNames) / sizeof(std::pair<const char*, ROCmValueKind>); |
---|
746 | |
---|
747 | static const std::pair<const char*, ROCmValueType> rocmValueTypeNames[] = |
---|
748 | { |
---|
749 | { "F16", ROCmValueType::FLOAT16 }, |
---|
750 | { "F32", ROCmValueType::FLOAT32 }, |
---|
751 | { "F64", ROCmValueType::FLOAT64 }, |
---|
752 | { "I16", ROCmValueType::INT16 }, |
---|
753 | { "I32", ROCmValueType::INT32 }, |
---|
754 | { "I64", ROCmValueType::INT64 }, |
---|
755 | { "I8", ROCmValueType::INT8 }, |
---|
756 | { "Struct", ROCmValueType::STRUCTURE }, |
---|
757 | { "U16", ROCmValueType::UINT16 }, |
---|
758 | { "U32", ROCmValueType::UINT32 }, |
---|
759 | { "U64", ROCmValueType::UINT64 }, |
---|
760 | { "U8", ROCmValueType::UINT8 } |
---|
761 | }; |
---|
762 | |
---|
763 | static const size_t rocmValueTypeNamesNum = |
---|
764 | sizeof(rocmValueTypeNames) / sizeof(std::pair<const char*, ROCmValueType>); |
---|
765 | |
---|
766 | static const char* rocmAddrSpaceTypesTbl[] = |
---|
767 | { "Private", "Global", "Constant", "Local", "Generic", "Region" }; |
---|
768 | |
---|
769 | static const char* rocmAccessQualifierTbl[] = |
---|
770 | { "Default", "ReadOnly", "WriteOnly", "ReadWrite" }; |
---|
771 | |
---|
772 | static void parseROCmMetadata(size_t metadataSize, const char* metadata, |
---|
773 | ROCmMetadata& metadataInfo) |
---|
774 | { |
---|
775 | const char* ptr = metadata; |
---|
776 | const char* end = metadata + metadataSize; |
---|
777 | size_t lineNo = 1; |
---|
778 | // init metadata info object |
---|
779 | metadataInfo.kernels.clear(); |
---|
780 | metadataInfo.printfInfos.clear(); |
---|
781 | metadataInfo.version[0] = metadataInfo.version[1] = 0; |
---|
782 | |
---|
783 | std::vector<ROCmKernelMetadata>& kernels = metadataInfo.kernels; |
---|
784 | |
---|
785 | cxuint levels[6] = { UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX, UINT_MAX }; |
---|
786 | cxuint curLevel = 0; |
---|
787 | bool inKernels = false; |
---|
788 | bool inKernel = false; |
---|
789 | bool inKernelArgs = false; |
---|
790 | bool inKernelArg = false; |
---|
791 | bool inKernelCodeProps = false; |
---|
792 | bool inKernelAttrs = false; |
---|
793 | bool canToNextLevel = false; |
---|
794 | |
---|
795 | size_t oldLineNo = 0; |
---|
796 | while (ptr != end) |
---|
797 | { |
---|
798 | cxuint level = skipSpacesAndComments(ptr, end, lineNo); |
---|
799 | if (ptr == end || lineNo == oldLineNo) |
---|
800 | throw ParseException(lineNo, "Expected new line"); |
---|
801 | |
---|
802 | if (levels[curLevel] == UINT_MAX) |
---|
803 | levels[curLevel] = level; |
---|
804 | else if (levels[curLevel] < level) |
---|
805 | { |
---|
806 | if (canToNextLevel) |
---|
807 | // go to next nesting level |
---|
808 | levels[++curLevel] = level; |
---|
809 | else |
---|
810 | throw ParseException(lineNo, "Unexpected nesting level"); |
---|
811 | canToNextLevel = false; |
---|
812 | } |
---|
813 | else if (levels[curLevel] > level) |
---|
814 | { |
---|
815 | while (curLevel != UINT_MAX && levels[curLevel] > level) |
---|
816 | curLevel--; |
---|
817 | if (curLevel == UINT_MAX) |
---|
818 | throw ParseException(lineNo, "Indentation smaller than in main level"); |
---|
819 | |
---|
820 | // pop from previous level |
---|
821 | if (curLevel < 3) |
---|
822 | { |
---|
823 | if (inKernelArgs) |
---|
824 | { |
---|
825 | // leave from kernel args |
---|
826 | inKernelArgs = false; |
---|
827 | inKernelArg = false; |
---|
828 | } |
---|
829 | |
---|
830 | inKernelCodeProps = false; |
---|
831 | inKernelAttrs = false; |
---|
832 | } |
---|
833 | if (curLevel < 1 && inKernels) |
---|
834 | { |
---|
835 | // leave from kernels |
---|
836 | inKernels = false; |
---|
837 | inKernel = false; |
---|
838 | } |
---|
839 | |
---|
840 | if (levels[curLevel] != level) |
---|
841 | throw ParseException(lineNo, "Unexpected nesting level"); |
---|
842 | } |
---|
843 | |
---|
844 | oldLineNo = lineNo; |
---|
845 | if (curLevel == 0) |
---|
846 | { |
---|
847 | if (lineNo==1 && ptr+3 <= end && *ptr=='-' && ptr[1]=='-' && ptr[2]=='-' && |
---|
848 | (ptr+3==end || (ptr+3 < end && ptr[3]=='\n'))) |
---|
849 | { |
---|
850 | ptr += 3; |
---|
851 | if (ptr!=end) |
---|
852 | { |
---|
853 | lineNo++; |
---|
854 | ptr++; // to newline |
---|
855 | } |
---|
856 | continue; // skip document start |
---|
857 | } |
---|
858 | |
---|
859 | if (ptr+3 <= end && *ptr=='.' && ptr[1]=='.' && ptr[2]=='.' && |
---|
860 | (ptr+3==end || (ptr+3 < end && ptr[3]=='\n'))) |
---|
861 | break; // end of the document |
---|
862 | |
---|
863 | const size_t keyIndex = parseYAMLKey(ptr, end, lineNo, |
---|
864 | mainMetadataKeywordsNum, mainMetadataKeywords); |
---|
865 | |
---|
866 | switch(keyIndex) |
---|
867 | { |
---|
868 | case ROCMMT_MAIN_KERNELS: |
---|
869 | inKernels = true; |
---|
870 | canToNextLevel = true; |
---|
871 | break; |
---|
872 | case ROCMMT_MAIN_PRINTF: |
---|
873 | { |
---|
874 | YAMLPrintfVectorConsumer consumer(metadataInfo.printfInfos); |
---|
875 | parseYAMLValArray(ptr, end, lineNo, levels[curLevel], &consumer, true); |
---|
876 | break; |
---|
877 | } |
---|
878 | case ROCMMT_MAIN_VERSION: |
---|
879 | { |
---|
880 | YAMLIntArrayConsumer<uint32_t> consumer(2, metadataInfo.version); |
---|
881 | parseYAMLValArray(ptr, end, lineNo, levels[curLevel], &consumer, true); |
---|
882 | break; |
---|
883 | } |
---|
884 | default: |
---|
885 | skipYAMLValue(ptr, end, lineNo, level); |
---|
886 | break; |
---|
887 | } |
---|
888 | } |
---|
889 | |
---|
890 | if (curLevel==1 && inKernels) |
---|
891 | { |
---|
892 | // enter to kernel level |
---|
893 | if (ptr == end || *ptr != '-') |
---|
894 | throw ParseException(lineNo, "No '-' before kernel object"); |
---|
895 | ptr++; |
---|
896 | const char* afterMinus = ptr; |
---|
897 | skipSpacesToLineEnd(ptr, end); |
---|
898 | levels[++curLevel] = level + 1 + ptr-afterMinus; |
---|
899 | level = levels[curLevel]; |
---|
900 | inKernel = true; |
---|
901 | |
---|
902 | kernels.push_back(ROCmKernelMetadata()); |
---|
903 | } |
---|
904 | |
---|
905 | if (curLevel==2 && inKernel) |
---|
906 | { |
---|
907 | // in kernel |
---|
908 | const size_t keyIndex = parseYAMLKey(ptr, end, lineNo, |
---|
909 | kernelMetadataKeywordsNum, kernelMetadataKeywords); |
---|
910 | |
---|
911 | ROCmKernelMetadata& kernel = kernels.back(); |
---|
912 | switch(keyIndex) |
---|
913 | { |
---|
914 | case ROCMMT_KERNEL_ARGS: |
---|
915 | inKernelArgs = true; |
---|
916 | canToNextLevel = true; |
---|
917 | break; |
---|
918 | case ROCMMT_KERNEL_ATTRS: |
---|
919 | inKernelAttrs = true; |
---|
920 | canToNextLevel = true; |
---|
921 | break; |
---|
922 | case ROCMMT_KERNEL_CODEPROPS: |
---|
923 | kernel.kernargSegmentSize = BINGEN64_DEFAULT; |
---|
924 | kernel.groupSegmentFixedSize = BINGEN64_DEFAULT; |
---|
925 | kernel.privateSegmentFixedSize = BINGEN64_DEFAULT; |
---|
926 | kernel.kernargSegmentAlign = BINGEN64_DEFAULT; |
---|
927 | kernel.wavefrontSize = BINGEN_DEFAULT; |
---|
928 | kernel.sgprsNum = BINGEN_DEFAULT; |
---|
929 | kernel.vgprsNum = BINGEN_DEFAULT; |
---|
930 | kernel.maxFlatWorkGroupSize = BINGEN64_DEFAULT; |
---|
931 | inKernelCodeProps = true; |
---|
932 | canToNextLevel = true; |
---|
933 | break; |
---|
934 | case ROCMMT_KERNEL_LANGUAGE: |
---|
935 | kernel.language = parseYAMLStringValue(ptr, end, lineNo, level, true); |
---|
936 | break; |
---|
937 | case ROCMMT_KERNEL_LANGUAGE_VERSION: |
---|
938 | { |
---|
939 | YAMLIntArrayConsumer<uint32_t> consumer(2, kernel.langVersion); |
---|
940 | parseYAMLValArray(ptr, end, lineNo, levels[curLevel], &consumer); |
---|
941 | break; |
---|
942 | } |
---|
943 | case ROCMMT_KERNEL_NAME: |
---|
944 | kernel.name = parseYAMLStringValue(ptr, end, lineNo, level, true); |
---|
945 | break; |
---|
946 | case ROCMMT_KERNEL_SYMBOLNAME: |
---|
947 | kernel.symbolName = parseYAMLStringValue(ptr, end, lineNo, level, true); |
---|
948 | break; |
---|
949 | default: |
---|
950 | skipYAMLValue(ptr, end, lineNo, level); |
---|
951 | break; |
---|
952 | } |
---|
953 | } |
---|
954 | |
---|
955 | if (curLevel==3 && inKernelAttrs) |
---|
956 | { |
---|
957 | // in kernel attributes |
---|
958 | const size_t keyIndex = parseYAMLKey(ptr, end, lineNo, |
---|
959 | kernelAttrMetadataKeywordsNum, kernelAttrMetadataKeywords); |
---|
960 | |
---|
961 | ROCmKernelMetadata& kernel = kernels.back(); |
---|
962 | switch(keyIndex) |
---|
963 | { |
---|
964 | case ROCMMT_ATTRS_REQD_WORK_GROUP_SIZE: |
---|
965 | { |
---|
966 | YAMLIntArrayConsumer<cxuint> consumer(3, kernel.reqdWorkGroupSize); |
---|
967 | parseYAMLValArray(ptr, end, lineNo, level, &consumer); |
---|
968 | break; |
---|
969 | } |
---|
970 | case ROCMMT_ATTRS_RUNTIME_HANDLE: |
---|
971 | kernel.runtimeHandle = parseYAMLStringValue( |
---|
972 | ptr, end, lineNo, level, true); |
---|
973 | break; |
---|
974 | case ROCMMT_ATTRS_VECTYPEHINT: |
---|
975 | kernel.vecTypeHint = parseYAMLStringValue( |
---|
976 | ptr, end, lineNo, level, true); |
---|
977 | break; |
---|
978 | case ROCMMT_ATTRS_WORK_GROUP_SIZE_HINT: |
---|
979 | { |
---|
980 | YAMLIntArrayConsumer<cxuint> consumer(3, kernel.workGroupSizeHint); |
---|
981 | parseYAMLValArray(ptr, end, lineNo, level, &consumer, true); |
---|
982 | break; |
---|
983 | } |
---|
984 | default: |
---|
985 | skipYAMLValue(ptr, end, lineNo, level); |
---|
986 | break; |
---|
987 | } |
---|
988 | } |
---|
989 | |
---|
990 | if (curLevel==3 && inKernelCodeProps) |
---|
991 | { |
---|
992 | // in kernel codeProps |
---|
993 | const size_t keyIndex = parseYAMLKey(ptr, end, lineNo, |
---|
994 | kernelCodePropsKeywordsNum, kernelCodePropsKeywords); |
---|
995 | |
---|
996 | ROCmKernelMetadata& kernel = kernels.back(); |
---|
997 | switch(keyIndex) |
---|
998 | { |
---|
999 | case ROCMMT_CODEPROPS_FIXED_WORK_GROUP_SIZE: |
---|
1000 | { |
---|
1001 | YAMLIntArrayConsumer<cxuint> consumer(3, kernel.fixedWorkGroupSize); |
---|
1002 | parseYAMLValArray(ptr, end, lineNo, level, &consumer); |
---|
1003 | break; |
---|
1004 | } |
---|
1005 | case ROCMMT_CODEPROPS_GROUP_SEGMENT_FIXED_SIZE: |
---|
1006 | kernel.groupSegmentFixedSize = |
---|
1007 | parseYAMLIntValue<cxuint>(ptr, end, lineNo, true); |
---|
1008 | break; |
---|
1009 | case ROCMMT_CODEPROPS_KERNARG_SEGMENT_ALIGN: |
---|
1010 | kernel.kernargSegmentAlign = |
---|
1011 | parseYAMLIntValue<uint64_t>(ptr, end, lineNo, true); |
---|
1012 | break; |
---|
1013 | case ROCMMT_CODEPROPS_KERNARG_SEGMENT_SIZE: |
---|
1014 | kernel.kernargSegmentSize = |
---|
1015 | parseYAMLIntValue<uint64_t>(ptr, end, lineNo, true); |
---|
1016 | break; |
---|
1017 | case ROCMMT_CODEPROPS_MAX_FLAT_WORK_GROUP_SIZE: |
---|
1018 | kernel.maxFlatWorkGroupSize = |
---|
1019 | parseYAMLIntValue<uint64_t>(ptr, end, lineNo, true); |
---|
1020 | break; |
---|
1021 | case ROCMMT_CODEPROPS_NUM_SGPRS: |
---|
1022 | kernel.sgprsNum = parseYAMLIntValue<cxuint>(ptr, end, lineNo, true); |
---|
1023 | break; |
---|
1024 | case ROCMMT_CODEPROPS_NUM_SPILLED_SGPRS: |
---|
1025 | kernel.spilledSgprs = |
---|
1026 | parseYAMLIntValue<cxuint>(ptr, end, lineNo, true); |
---|
1027 | break; |
---|
1028 | case ROCMMT_CODEPROPS_NUM_SPILLED_VGPRS: |
---|
1029 | kernel.spilledVgprs = |
---|
1030 | parseYAMLIntValue<cxuint>(ptr, end, lineNo, true); |
---|
1031 | break; |
---|
1032 | case ROCMMT_CODEPROPS_NUM_VGPRS: |
---|
1033 | kernel.vgprsNum = parseYAMLIntValue<cxuint>(ptr, end, lineNo, true); |
---|
1034 | break; |
---|
1035 | case ROCMMT_CODEPROPS_PRIVATE_SEGMENT_FIXED_SIZE: |
---|
1036 | kernel.privateSegmentFixedSize = |
---|
1037 | parseYAMLIntValue<uint64_t>(ptr, end, lineNo, true); |
---|
1038 | break; |
---|
1039 | case ROCMMT_CODEPROPS_WAVEFRONT_SIZE: |
---|
1040 | kernel.wavefrontSize = |
---|
1041 | parseYAMLIntValue<cxuint>(ptr, end, lineNo, true); |
---|
1042 | break; |
---|
1043 | default: |
---|
1044 | skipYAMLValue(ptr, end, lineNo, level); |
---|
1045 | break; |
---|
1046 | } |
---|
1047 | } |
---|
1048 | |
---|
1049 | if (curLevel==3 && inKernelArgs) |
---|
1050 | { |
---|
1051 | // enter to kernel argument level |
---|
1052 | if (ptr == end || *ptr != '-') |
---|
1053 | throw ParseException(lineNo, "No '-' before argument object"); |
---|
1054 | ptr++; |
---|
1055 | const char* afterMinus = ptr; |
---|
1056 | skipSpacesToLineEnd(ptr, end); |
---|
1057 | levels[++curLevel] = level + 1 + ptr-afterMinus; |
---|
1058 | level = levels[curLevel]; |
---|
1059 | inKernelArg = true; |
---|
1060 | |
---|
1061 | kernels.back().argInfos.push_back(ROCmKernelArgInfo{}); |
---|
1062 | } |
---|
1063 | |
---|
1064 | if (curLevel==4 && inKernelArg) |
---|
1065 | { |
---|
1066 | // in kernel argument |
---|
1067 | const size_t keyIndex = parseYAMLKey(ptr, end, lineNo, |
---|
1068 | kernelArgInfosKeywordsNum, kernelArgInfosKeywords); |
---|
1069 | |
---|
1070 | ROCmKernelArgInfo& kernelArg = kernels.back().argInfos.back(); |
---|
1071 | |
---|
1072 | size_t valLineNo = lineNo; |
---|
1073 | switch(keyIndex) |
---|
1074 | { |
---|
1075 | case ROCMMT_ARGS_ACCQUAL: |
---|
1076 | case ROCMMT_ARGS_ACTUALACCQUAL: |
---|
1077 | { |
---|
1078 | const std::string acc = trimStrSpaces(parseYAMLStringValue( |
---|
1079 | ptr, end, lineNo, level, true)); |
---|
1080 | size_t accIndex = 0; |
---|
1081 | for (; accIndex < 6; accIndex++) |
---|
1082 | if (::strcmp(rocmAccessQualifierTbl[accIndex], acc.c_str())==0) |
---|
1083 | break; |
---|
1084 | if (accIndex == 4) |
---|
1085 | throw ParseException(lineNo, "Wrong access qualifier"); |
---|
1086 | if (keyIndex == ROCMMT_ARGS_ACCQUAL) |
---|
1087 | kernelArg.accessQual = ROCmAccessQual(accIndex); |
---|
1088 | else |
---|
1089 | kernelArg.actualAccessQual = ROCmAccessQual(accIndex); |
---|
1090 | break; |
---|
1091 | } |
---|
1092 | case ROCMMT_ARGS_ADDRSPACEQUAL: |
---|
1093 | { |
---|
1094 | const std::string aspace = trimStrSpaces(parseYAMLStringValue( |
---|
1095 | ptr, end, lineNo, level, true)); |
---|
1096 | size_t aspaceIndex = 0; |
---|
1097 | for (; aspaceIndex < 6; aspaceIndex++) |
---|
1098 | if (::strcmp(rocmAddrSpaceTypesTbl[aspaceIndex], |
---|
1099 | aspace.c_str())==0) |
---|
1100 | break; |
---|
1101 | if (aspaceIndex == 6) |
---|
1102 | throw ParseException(valLineNo, "Wrong address space"); |
---|
1103 | kernelArg.addressSpace = ROCmAddressSpace(aspaceIndex+1); |
---|
1104 | break; |
---|
1105 | } |
---|
1106 | case ROCMMT_ARGS_ALIGN: |
---|
1107 | kernelArg.align = parseYAMLIntValue<uint64_t>(ptr, end, lineNo, true); |
---|
1108 | break; |
---|
1109 | case ROCMMT_ARGS_ISCONST: |
---|
1110 | kernelArg.isConst = parseYAMLBoolValue(ptr, end, lineNo, true); |
---|
1111 | break; |
---|
1112 | case ROCMMT_ARGS_ISPIPE: |
---|
1113 | kernelArg.isPipe = parseYAMLBoolValue(ptr, end, lineNo, true); |
---|
1114 | break; |
---|
1115 | case ROCMMT_ARGS_ISRESTRICT: |
---|
1116 | kernelArg.isRestrict = parseYAMLBoolValue(ptr, end, lineNo, true); |
---|
1117 | break; |
---|
1118 | case ROCMMT_ARGS_ISVOLATILE: |
---|
1119 | kernelArg.isVolatile = parseYAMLBoolValue(ptr, end, lineNo, true); |
---|
1120 | break; |
---|
1121 | case ROCMMT_ARGS_NAME: |
---|
1122 | kernelArg.name = parseYAMLStringValue(ptr, end, lineNo, level, true); |
---|
1123 | break; |
---|
1124 | case ROCMMT_ARGS_POINTEE_ALIGN: |
---|
1125 | kernelArg.pointeeAlign = |
---|
1126 | parseYAMLIntValue<uint64_t>(ptr, end, lineNo, true); |
---|
1127 | break; |
---|
1128 | case ROCMMT_ARGS_SIZE: |
---|
1129 | kernelArg.size = parseYAMLIntValue<uint64_t>(ptr, end, lineNo); |
---|
1130 | break; |
---|
1131 | case ROCMMT_ARGS_TYPENAME: |
---|
1132 | kernelArg.typeName = |
---|
1133 | parseYAMLStringValue(ptr, end, lineNo, level, true); |
---|
1134 | break; |
---|
1135 | case ROCMMT_ARGS_VALUEKIND: |
---|
1136 | { |
---|
1137 | const std::string vkind = trimStrSpaces(parseYAMLStringValue( |
---|
1138 | ptr, end, lineNo, level, true)); |
---|
1139 | const size_t vkindIndex = binaryMapFind(rocmValueKindNames, |
---|
1140 | rocmValueKindNames + rocmValueKindNamesNum, vkind.c_str(), |
---|
1141 | CStringLess()) - rocmValueKindNames; |
---|
1142 | // if unknown kind |
---|
1143 | if (vkindIndex == rocmValueKindNamesNum) |
---|
1144 | throw ParseException(valLineNo, "Wrong argument value kind"); |
---|
1145 | kernelArg.valueKind = rocmValueKindNames[vkindIndex].second; |
---|
1146 | break; |
---|
1147 | } |
---|
1148 | case ROCMMT_ARGS_VALUETYPE: |
---|
1149 | { |
---|
1150 | const std::string vtype = trimStrSpaces(parseYAMLStringValue( |
---|
1151 | ptr, end, lineNo, level, true)); |
---|
1152 | const size_t vtypeIndex = binaryMapFind(rocmValueTypeNames, |
---|
1153 | rocmValueTypeNames + rocmValueTypeNamesNum, vtype.c_str(), |
---|
1154 | CStringLess()) - rocmValueTypeNames; |
---|
1155 | // if unknown type |
---|
1156 | if (vtypeIndex == rocmValueTypeNamesNum) |
---|
1157 | throw ParseException(valLineNo, "Wrong argument value type"); |
---|
1158 | kernelArg.valueType = rocmValueTypeNames[vtypeIndex].second; |
---|
1159 | break; |
---|
1160 | } |
---|
1161 | default: |
---|
1162 | skipYAMLValue(ptr, end, lineNo, level); |
---|
1163 | break; |
---|
1164 | } |
---|
1165 | } |
---|
1166 | } |
---|
1167 | } |
---|
1168 | |
---|
1169 | /* |
---|
1170 | * ROCm binary reader and generator |
---|
1171 | */ |
---|
1172 | |
---|
1173 | /* TODO: add support for various kernel code offset (now only 256 is supported) */ |
---|
1174 | |
---|
1175 | ROCmBinary::ROCmBinary(size_t binaryCodeSize, cxbyte* binaryCode, Flags creationFlags) |
---|
1176 | : ElfBinary64(binaryCodeSize, binaryCode, creationFlags), |
---|
1177 | regionsNum(0), codeSize(0), code(nullptr), |
---|
1178 | globalDataSize(0), globalData(nullptr), metadataSize(0), metadata(nullptr), |
---|
1179 | newBinFormat(false) |
---|
1180 | { |
---|
1181 | cxuint textIndex = SHN_UNDEF; |
---|
1182 | try |
---|
1183 | { textIndex = getSectionIndex(".text"); } |
---|
1184 | catch(const Exception& ex) |
---|
1185 | { } // ignore failed |
---|
1186 | uint64_t codeOffset = 0; |
---|
1187 | // find '.text' section |
---|
1188 | if (textIndex!=SHN_UNDEF) |
---|
1189 | { |
---|
1190 | code = getSectionContent(textIndex); |
---|
1191 | const Elf64_Shdr& textShdr = getSectionHeader(textIndex); |
---|
1192 | codeSize = ULEV(textShdr.sh_size); |
---|
1193 | codeOffset = ULEV(textShdr.sh_offset); |
---|
1194 | } |
---|
1195 | |
---|
1196 | cxuint rodataIndex = SHN_UNDEF; |
---|
1197 | try |
---|
1198 | { rodataIndex = getSectionIndex(".rodata"); } |
---|
1199 | catch(const Exception& ex) |
---|
1200 | { } // ignore failed |
---|
1201 | // find '.text' section |
---|
1202 | if (rodataIndex!=SHN_UNDEF) |
---|
1203 | { |
---|
1204 | globalData = getSectionContent(rodataIndex); |
---|
1205 | const Elf64_Shdr& rodataShdr = getSectionHeader(rodataIndex); |
---|
1206 | globalDataSize = ULEV(rodataShdr.sh_size); |
---|
1207 | } |
---|
1208 | |
---|
1209 | cxuint gpuConfigIndex = SHN_UNDEF; |
---|
1210 | try |
---|
1211 | { gpuConfigIndex = getSectionIndex(".AMDGPU.config"); } |
---|
1212 | catch(const Exception& ex) |
---|
1213 | { } // ignore failed |
---|
1214 | newBinFormat = (gpuConfigIndex == SHN_UNDEF); |
---|
1215 | |
---|
1216 | // counts regions (symbol or kernel) |
---|
1217 | regionsNum = 0; |
---|
1218 | const size_t symbolsNum = getSymbolsNum(); |
---|
1219 | for (size_t i = 0; i < symbolsNum; i++) |
---|
1220 | { |
---|
1221 | // count regions number |
---|
1222 | const Elf64_Sym& sym = getSymbol(i); |
---|
1223 | const cxbyte symType = ELF64_ST_TYPE(sym.st_info); |
---|
1224 | const cxbyte bind = ELF64_ST_BIND(sym.st_info); |
---|
1225 | if (ULEV(sym.st_shndx)==textIndex && |
---|
1226 | (symType==STT_GNU_IFUNC || symType==STT_FUNC || |
---|
1227 | (bind==STB_GLOBAL && symType==STT_OBJECT))) |
---|
1228 | regionsNum++; |
---|
1229 | } |
---|
1230 | if (code==nullptr && regionsNum!=0) |
---|
1231 | throw BinException("No code if regions number is not zero"); |
---|
1232 | regions.reset(new ROCmRegion[regionsNum]); |
---|
1233 | size_t j = 0; |
---|
1234 | typedef std::pair<uint64_t, size_t> RegionOffsetEntry; |
---|
1235 | std::unique_ptr<RegionOffsetEntry[]> symOffsets(new RegionOffsetEntry[regionsNum]); |
---|
1236 | |
---|
1237 | // get regions info |
---|
1238 | for (size_t i = 0; i < symbolsNum; i++) |
---|
1239 | { |
---|
1240 | const Elf64_Sym& sym = getSymbol(i); |
---|
1241 | if (ULEV(sym.st_shndx)!=textIndex) |
---|
1242 | continue; // if not in '.text' section |
---|
1243 | const size_t value = ULEV(sym.st_value); |
---|
1244 | if (value < codeOffset) |
---|
1245 | throw BinException("Region offset is too small!"); |
---|
1246 | const size_t size = ULEV(sym.st_size); |
---|
1247 | |
---|
1248 | const cxbyte symType = ELF64_ST_TYPE(sym.st_info); |
---|
1249 | const cxbyte bind = ELF64_ST_BIND(sym.st_info); |
---|
1250 | if (symType==STT_GNU_IFUNC || symType==STT_FUNC || |
---|
1251 | (bind==STB_GLOBAL && symType==STT_OBJECT)) |
---|
1252 | { |
---|
1253 | ROCmRegionType type = ROCmRegionType::DATA; |
---|
1254 | // if kernel |
---|
1255 | if (symType==STT_GNU_IFUNC) |
---|
1256 | type = ROCmRegionType::KERNEL; |
---|
1257 | // if function kernel |
---|
1258 | else if (symType==STT_FUNC) |
---|
1259 | type = ROCmRegionType::FKERNEL; |
---|
1260 | symOffsets[j] = std::make_pair(value, j); |
---|
1261 | if (type!=ROCmRegionType::DATA && value+0x100 > codeOffset+codeSize) |
---|
1262 | throw BinException("Kernel or code offset is too big!"); |
---|
1263 | regions[j++] = { getSymbolName(i), size, value, type }; |
---|
1264 | } |
---|
1265 | } |
---|
1266 | // sort regions by offset |
---|
1267 | std::sort(symOffsets.get(), symOffsets.get()+regionsNum, |
---|
1268 | [](const RegionOffsetEntry& a, const RegionOffsetEntry& b) |
---|
1269 | { return a.first < b.first; }); |
---|
1270 | // checking distance between regions |
---|
1271 | for (size_t i = 1; i <= regionsNum; i++) |
---|
1272 | { |
---|
1273 | size_t end = (i<regionsNum) ? symOffsets[i].first : codeOffset+codeSize; |
---|
1274 | ROCmRegion& region = regions[symOffsets[i-1].second]; |
---|
1275 | if (region.type==ROCmRegionType::KERNEL && symOffsets[i-1].first+0x100 > end) |
---|
1276 | throw BinException("Kernel size is too small!"); |
---|
1277 | |
---|
1278 | const size_t regSize = end - symOffsets[i-1].first; |
---|
1279 | if (region.size==0) |
---|
1280 | region.size = regSize; |
---|
1281 | else |
---|
1282 | region.size = std::min(regSize, region.size); |
---|
1283 | } |
---|
1284 | |
---|
1285 | // get metadata |
---|
1286 | const size_t notesSize = getNotesSize(); |
---|
1287 | const cxbyte* noteContent = (const cxbyte*)getNotes(); |
---|
1288 | |
---|
1289 | for (size_t offset = 0; offset < notesSize; ) |
---|
1290 | { |
---|
1291 | const Elf64_Nhdr* nhdr = (const Elf64_Nhdr*)(noteContent + offset); |
---|
1292 | size_t namesz = ULEV(nhdr->n_namesz); |
---|
1293 | size_t descsz = ULEV(nhdr->n_descsz); |
---|
1294 | if (usumGt(offset, namesz+descsz, notesSize)) |
---|
1295 | throw BinException("Note offset+size out of range"); |
---|
1296 | |
---|
1297 | if (namesz==4 && |
---|
1298 | ::strcmp((const char*)noteContent+offset+ sizeof(Elf64_Nhdr), "AMD")==0) |
---|
1299 | { |
---|
1300 | const uint32_t noteType = ULEV(nhdr->n_type); |
---|
1301 | if (noteType == 0xa) |
---|
1302 | { |
---|
1303 | metadata = (char*)(noteContent+offset+sizeof(Elf64_Nhdr) + 4); |
---|
1304 | metadataSize = descsz; |
---|
1305 | } |
---|
1306 | else if (noteType == 0xb) |
---|
1307 | target.assign((char*)(noteContent+offset+sizeof(Elf64_Nhdr) + 4), descsz); |
---|
1308 | } |
---|
1309 | size_t align = (((namesz+descsz)&3)!=0) ? 4-((namesz+descsz)&3) : 0; |
---|
1310 | offset += sizeof(Elf64_Nhdr) + namesz + descsz + align; |
---|
1311 | } |
---|
1312 | |
---|
1313 | if (hasRegionMap()) |
---|
1314 | { |
---|
1315 | // create region map |
---|
1316 | regionsMap.resize(regionsNum); |
---|
1317 | for (size_t i = 0; i < regionsNum; i++) |
---|
1318 | regionsMap[i] = std::make_pair(regions[i].regionName, i); |
---|
1319 | // sort region map |
---|
1320 | mapSort(regionsMap.begin(), regionsMap.end()); |
---|
1321 | } |
---|
1322 | |
---|
1323 | if ((creationFlags & ROCMBIN_CREATE_METADATAINFO) != 0 && |
---|
1324 | metadata != nullptr && metadataSize != 0) |
---|
1325 | { |
---|
1326 | metadataInfo.reset(new ROCmMetadata()); |
---|
1327 | parseROCmMetadata(metadataSize, metadata, *metadataInfo); |
---|
1328 | |
---|
1329 | if (hasKernelInfoMap()) |
---|
1330 | { |
---|
1331 | const std::vector<ROCmKernelMetadata>& kernels = metadataInfo->kernels; |
---|
1332 | kernelInfosMap.resize(kernels.size()); |
---|
1333 | for (size_t i = 0; i < kernelInfosMap.size(); i++) |
---|
1334 | kernelInfosMap[i] = std::make_pair(kernels[i].name, i); |
---|
1335 | // sort region map |
---|
1336 | mapSort(kernelInfosMap.begin(), kernelInfosMap.end()); |
---|
1337 | } |
---|
1338 | } |
---|
1339 | } |
---|
1340 | |
---|
1341 | /// determint GPU device from ROCm notes |
---|
1342 | GPUDeviceType ROCmBinary::determineGPUDeviceType(uint32_t& outArchMinor, |
---|
1343 | uint32_t& outArchStepping) const |
---|
1344 | { |
---|
1345 | uint32_t archMajor = 0; |
---|
1346 | uint32_t archMinor = 0; |
---|
1347 | uint32_t archStepping = 0; |
---|
1348 | |
---|
1349 | { |
---|
1350 | const cxbyte* noteContent = (const cxbyte*)getNotes(); |
---|
1351 | if (noteContent==nullptr) |
---|
1352 | throw BinException("Missing notes in inner binary!"); |
---|
1353 | size_t notesSize = getNotesSize(); |
---|
1354 | // find note about AMDGPU |
---|
1355 | for (size_t offset = 0; offset < notesSize; ) |
---|
1356 | { |
---|
1357 | const Elf64_Nhdr* nhdr = (const Elf64_Nhdr*)(noteContent + offset); |
---|
1358 | size_t namesz = ULEV(nhdr->n_namesz); |
---|
1359 | size_t descsz = ULEV(nhdr->n_descsz); |
---|
1360 | if (usumGt(offset, namesz+descsz, notesSize)) |
---|
1361 | throw BinException("Note offset+size out of range"); |
---|
1362 | if (ULEV(nhdr->n_type) == 0x3 && namesz==4 && descsz>=0x1a && |
---|
1363 | ::strcmp((const char*)noteContent+offset+sizeof(Elf64_Nhdr), "AMD")==0) |
---|
1364 | { // AMDGPU type |
---|
1365 | const uint32_t* content = (const uint32_t*) |
---|
1366 | (noteContent+offset+sizeof(Elf64_Nhdr) + 4); |
---|
1367 | archMajor = ULEV(content[1]); |
---|
1368 | archMinor = ULEV(content[2]); |
---|
1369 | archStepping = ULEV(content[3]); |
---|
1370 | } |
---|
1371 | size_t align = (((namesz+descsz)&3)!=0) ? 4-((namesz+descsz)&3) : 0; |
---|
1372 | offset += sizeof(Elf64_Nhdr) + namesz + descsz + align; |
---|
1373 | } |
---|
1374 | } |
---|
1375 | // determine device type |
---|
1376 | GPUDeviceType deviceType = getGPUDeviceTypeFromArchVersion(archMajor, archMinor, |
---|
1377 | archStepping); |
---|
1378 | outArchMinor = archMinor; |
---|
1379 | outArchStepping = archStepping; |
---|
1380 | return deviceType; |
---|
1381 | } |
---|
1382 | |
---|
1383 | const ROCmRegion& ROCmBinary::getRegion(const char* name) const |
---|
1384 | { |
---|
1385 | RegionMap::const_iterator it = binaryMapFind(regionsMap.begin(), |
---|
1386 | regionsMap.end(), name); |
---|
1387 | if (it == regionsMap.end()) |
---|
1388 | throw BinException("Can't find region name"); |
---|
1389 | return regions[it->second]; |
---|
1390 | } |
---|
1391 | |
---|
1392 | const ROCmKernelMetadata& ROCmBinary::getKernelInfo(const char* name) const |
---|
1393 | { |
---|
1394 | if (!hasMetadataInfo()) |
---|
1395 | throw BinException("Can't find kernel info name"); |
---|
1396 | RegionMap::const_iterator it = binaryMapFind(kernelInfosMap.begin(), |
---|
1397 | kernelInfosMap.end(), name); |
---|
1398 | if (it == kernelInfosMap.end()) |
---|
1399 | throw BinException("Can't find kernel info name"); |
---|
1400 | return metadataInfo->kernels[it->second]; |
---|
1401 | } |
---|
1402 | |
---|
1403 | // if ROCm binary |
---|
1404 | bool CLRX::isROCmBinary(size_t binarySize, const cxbyte* binary) |
---|
1405 | { |
---|
1406 | if (!isElfBinary(binarySize, binary)) |
---|
1407 | return false; |
---|
1408 | if (binary[EI_CLASS] != ELFCLASS64) |
---|
1409 | return false; |
---|
1410 | const Elf64_Ehdr* ehdr = reinterpret_cast<const Elf64_Ehdr*>(binary); |
---|
1411 | if (ULEV(ehdr->e_machine) != 0xe0) |
---|
1412 | return false; |
---|
1413 | return true; |
---|
1414 | } |
---|
1415 | |
---|
1416 | |
---|
1417 | void ROCmInput::addEmptyKernel(const char* kernelName) |
---|
1418 | { |
---|
1419 | symbols.push_back({ kernelName, 0, 0, ROCmRegionType::KERNEL }); |
---|
1420 | } |
---|
1421 | |
---|
1422 | /* |
---|
1423 | * ROCm Binary Generator |
---|
1424 | */ |
---|
1425 | |
---|
1426 | ROCmBinGenerator::ROCmBinGenerator() : manageable(false), input(nullptr) |
---|
1427 | { } |
---|
1428 | |
---|
1429 | ROCmBinGenerator::ROCmBinGenerator(const ROCmInput* rocmInput) |
---|
1430 | : manageable(false), input(rocmInput) |
---|
1431 | { } |
---|
1432 | |
---|
1433 | ROCmBinGenerator::ROCmBinGenerator(GPUDeviceType deviceType, |
---|
1434 | uint32_t archMinor, uint32_t archStepping, size_t codeSize, const cxbyte* code, |
---|
1435 | size_t globalDataSize, const cxbyte* globalData, |
---|
1436 | const std::vector<ROCmSymbolInput>& symbols) |
---|
1437 | { |
---|
1438 | input = new ROCmInput{ deviceType, archMinor, archStepping, 0, false, |
---|
1439 | globalDataSize, globalData, symbols, codeSize, code }; |
---|
1440 | } |
---|
1441 | |
---|
1442 | ROCmBinGenerator::ROCmBinGenerator(GPUDeviceType deviceType, |
---|
1443 | uint32_t archMinor, uint32_t archStepping, size_t codeSize, const cxbyte* code, |
---|
1444 | size_t globalDataSize, const cxbyte* globalData, |
---|
1445 | std::vector<ROCmSymbolInput>&& symbols) |
---|
1446 | { |
---|
1447 | input = new ROCmInput{ deviceType, archMinor, archStepping, 0, false, |
---|
1448 | globalDataSize, globalData, std::move(symbols), codeSize, code }; |
---|
1449 | } |
---|
1450 | |
---|
1451 | ROCmBinGenerator::~ROCmBinGenerator() |
---|
1452 | { |
---|
1453 | if (manageable) |
---|
1454 | delete input; |
---|
1455 | } |
---|
1456 | |
---|
1457 | void ROCmBinGenerator::setInput(const ROCmInput* input) |
---|
1458 | { |
---|
1459 | if (manageable) |
---|
1460 | delete input; |
---|
1461 | manageable = false; |
---|
1462 | this->input = input; |
---|
1463 | } |
---|
1464 | |
---|
1465 | // ELF notes contents |
---|
1466 | static const cxbyte noteDescType1[8] = |
---|
1467 | { 2, 0, 0, 0, 1, 0, 0, 0 }; |
---|
1468 | |
---|
1469 | static const cxbyte noteDescType3[27] = |
---|
1470 | { 4, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
1471 | 'A', 'M', 'D', 0, 'A', 'M', 'D', 'G', 'P', 'U', 0 }; |
---|
1472 | |
---|
1473 | static inline void addMainSectionToTable(cxuint& sectionsNum, uint16_t* builtinTable, |
---|
1474 | cxuint elfSectId) |
---|
1475 | { builtinTable[elfSectId - ELFSECTID_START] = sectionsNum++; } |
---|
1476 | |
---|
1477 | void ROCmBinGenerator::generateInternal(std::ostream* osPtr, std::vector<char>* vPtr, |
---|
1478 | Array<cxbyte>* aPtr) const |
---|
1479 | { |
---|
1480 | AMDGPUArchVersion amdGpuArchValues = getGPUArchVersion(input->deviceType, |
---|
1481 | GPUArchVersionTable::ROCM); |
---|
1482 | if (input->archMinor!=UINT32_MAX) |
---|
1483 | amdGpuArchValues.minor = input->archMinor; |
---|
1484 | if (input->archStepping!=UINT32_MAX) |
---|
1485 | amdGpuArchValues.stepping = input->archStepping; |
---|
1486 | |
---|
1487 | const char* comment = "CLRX ROCmBinGenerator " CLRX_VERSION; |
---|
1488 | uint32_t commentSize = ::strlen(comment); |
---|
1489 | if (input->comment!=nullptr) |
---|
1490 | { |
---|
1491 | // if comment, store comment section |
---|
1492 | comment = input->comment; |
---|
1493 | commentSize = input->commentSize; |
---|
1494 | if (commentSize==0) |
---|
1495 | commentSize = ::strlen(comment); |
---|
1496 | } |
---|
1497 | |
---|
1498 | uint32_t eflags = input->newBinFormat ? 2 : 0; |
---|
1499 | if (input->eflags != BINGEN_DEFAULT) |
---|
1500 | eflags = input->eflags; |
---|
1501 | |
---|
1502 | ElfBinaryGen64 elfBinGen64({ 0U, 0U, 0x40, 0, ET_DYN, |
---|
1503 | 0xe0, EV_CURRENT, UINT_MAX, 0, eflags }, |
---|
1504 | true, true, true, PHREGION_FILESTART); |
---|
1505 | |
---|
1506 | uint16_t mainBuiltinSectTable[ROCMSECTID_MAX-ELFSECTID_START+1]; |
---|
1507 | std::fill(mainBuiltinSectTable, |
---|
1508 | mainBuiltinSectTable + ROCMSECTID_MAX-ELFSECTID_START+1, SHN_UNDEF); |
---|
1509 | cxuint mainSectionsNum = 1; |
---|
1510 | |
---|
1511 | // generate main builtin section table (for section id translation) |
---|
1512 | if (input->newBinFormat) |
---|
1513 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ROCMSECTID_NOTE); |
---|
1514 | if (input->globalData != nullptr) |
---|
1515 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_RODATA); |
---|
1516 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_DYNSYM); |
---|
1517 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ROCMSECTID_HASH); |
---|
1518 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_DYNSTR); |
---|
1519 | const cxuint execProgHeaderRegionIndex = mainSectionsNum; |
---|
1520 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_TEXT); |
---|
1521 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ROCMSECTID_DYNAMIC); |
---|
1522 | if (!input->newBinFormat) |
---|
1523 | { |
---|
1524 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ROCMSECTID_NOTE); |
---|
1525 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ROCMSECTID_GPUCONFIG); |
---|
1526 | } |
---|
1527 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_COMMENT); |
---|
1528 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_SYMTAB); |
---|
1529 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_SHSTRTAB); |
---|
1530 | addMainSectionToTable(mainSectionsNum, mainBuiltinSectTable, ELFSECTID_STRTAB); |
---|
1531 | |
---|
1532 | // add symbols (kernels, function kernels and data symbols) |
---|
1533 | elfBinGen64.addSymbol(ElfSymbol64("_DYNAMIC", |
---|
1534 | mainBuiltinSectTable[ROCMSECTID_DYNAMIC-ELFSECTID_START], |
---|
1535 | ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE), STV_HIDDEN, true, 0, 0)); |
---|
1536 | const uint16_t textSectIndex = mainBuiltinSectTable[ELFSECTID_TEXT-ELFSECTID_START]; |
---|
1537 | for (const ROCmSymbolInput& symbol: input->symbols) |
---|
1538 | { |
---|
1539 | ElfSymbol64 elfsym; |
---|
1540 | switch (symbol.type) |
---|
1541 | { |
---|
1542 | case ROCmRegionType::KERNEL: |
---|
1543 | elfsym = ElfSymbol64(symbol.symbolName.c_str(), textSectIndex, |
---|
1544 | ELF64_ST_INFO(STB_GLOBAL, STT_GNU_IFUNC), 0, true, |
---|
1545 | symbol.offset, symbol.size); |
---|
1546 | break; |
---|
1547 | case ROCmRegionType::FKERNEL: |
---|
1548 | elfsym = ElfSymbol64(symbol.symbolName.c_str(), textSectIndex, |
---|
1549 | ELF64_ST_INFO(STB_GLOBAL, STT_FUNC), 0, true, |
---|
1550 | symbol.offset, symbol.size); |
---|
1551 | break; |
---|
1552 | case ROCmRegionType::DATA: |
---|
1553 | elfsym = ElfSymbol64(symbol.symbolName.c_str(), textSectIndex, |
---|
1554 | ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 0, true, |
---|
1555 | symbol.offset, symbol.size); |
---|
1556 | break; |
---|
1557 | default: |
---|
1558 | break; |
---|
1559 | } |
---|
1560 | // add to symbols and dynamic symbols table |
---|
1561 | elfBinGen64.addSymbol(elfsym); |
---|
1562 | elfBinGen64.addDynSymbol(elfsym); |
---|
1563 | } |
---|
1564 | |
---|
1565 | static const int32_t dynTags[] = { |
---|
1566 | DT_SYMTAB, DT_SYMENT, DT_STRTAB, DT_STRSZ, DT_HASH }; |
---|
1567 | elfBinGen64.addDynamics(sizeof(dynTags)/sizeof(int32_t), dynTags); |
---|
1568 | |
---|
1569 | // elf program headers |
---|
1570 | elfBinGen64.addProgramHeader({ PT_PHDR, PF_R, 0, 1, |
---|
1571 | true, Elf64Types::nobase, Elf64Types::nobase, 0 }); |
---|
1572 | elfBinGen64.addProgramHeader({ PT_LOAD, PF_R, PHREGION_FILESTART, |
---|
1573 | execProgHeaderRegionIndex, |
---|
1574 | true, Elf64Types::nobase, Elf64Types::nobase, 0, 0x1000 }); |
---|
1575 | elfBinGen64.addProgramHeader({ PT_LOAD, PF_R|PF_X, execProgHeaderRegionIndex, 1, |
---|
1576 | true, Elf64Types::nobase, Elf64Types::nobase, 0 }); |
---|
1577 | elfBinGen64.addProgramHeader({ PT_LOAD, PF_R|PF_W, execProgHeaderRegionIndex+1, 1, |
---|
1578 | true, Elf64Types::nobase, Elf64Types::nobase, 0 }); |
---|
1579 | elfBinGen64.addProgramHeader({ PT_DYNAMIC, PF_R|PF_W, execProgHeaderRegionIndex+1, 1, |
---|
1580 | true, Elf64Types::nobase, Elf64Types::nobase, 0, 8 }); |
---|
1581 | elfBinGen64.addProgramHeader({ PT_GNU_RELRO, PF_R, execProgHeaderRegionIndex+1, 1, |
---|
1582 | true, Elf64Types::nobase, Elf64Types::nobase, 0, 1 }); |
---|
1583 | elfBinGen64.addProgramHeader({ PT_GNU_STACK, PF_R|PF_W, PHREGION_FILESTART, 0, |
---|
1584 | true, 0, 0, 0 }); |
---|
1585 | |
---|
1586 | if (input->newBinFormat) |
---|
1587 | // program header for note (new binary format) |
---|
1588 | elfBinGen64.addProgramHeader({ PT_NOTE, PF_R, 1, 1, true, |
---|
1589 | Elf64Types::nobase, Elf64Types::nobase, 0, 4 }); |
---|
1590 | |
---|
1591 | std::string target = input->target.c_str(); |
---|
1592 | if (target.empty() && !input->targetTripple.empty()) |
---|
1593 | { |
---|
1594 | target = input->targetTripple.c_str(); |
---|
1595 | char dbuf[20]; |
---|
1596 | snprintf(dbuf, 20, "-gfx%u%u%u", amdGpuArchValues.major, amdGpuArchValues.minor, |
---|
1597 | amdGpuArchValues.stepping); |
---|
1598 | target += dbuf; |
---|
1599 | } |
---|
1600 | // elf notes |
---|
1601 | elfBinGen64.addNote({"AMD", sizeof noteDescType1, noteDescType1, 1U}); |
---|
1602 | std::unique_ptr<cxbyte[]> noteBuf(new cxbyte[0x1b]); |
---|
1603 | ::memcpy(noteBuf.get(), noteDescType3, 0x1b); |
---|
1604 | SULEV(*(uint32_t*)(noteBuf.get()+4), amdGpuArchValues.major); |
---|
1605 | SULEV(*(uint32_t*)(noteBuf.get()+8), amdGpuArchValues.minor); |
---|
1606 | SULEV(*(uint32_t*)(noteBuf.get()+12), amdGpuArchValues.stepping); |
---|
1607 | elfBinGen64.addNote({"AMD", 0x1b, noteBuf.get(), 3U}); |
---|
1608 | if (!target.empty()) |
---|
1609 | elfBinGen64.addNote({"AMD", target.size(), (const cxbyte*)target.c_str(), 0xbU}); |
---|
1610 | if (input->metadataSize != 0) |
---|
1611 | elfBinGen64.addNote({"AMD", input->metadataSize, |
---|
1612 | (const cxbyte*)input->metadata, 0xaU}); |
---|
1613 | |
---|
1614 | /// region and sections |
---|
1615 | elfBinGen64.addRegion(ElfRegion64::programHeaderTable()); |
---|
1616 | if (input->newBinFormat) |
---|
1617 | elfBinGen64.addRegion(ElfRegion64::noteSection()); |
---|
1618 | if (input->globalData != nullptr) |
---|
1619 | elfBinGen64.addRegion(ElfRegion64(input->globalDataSize, input->globalData, 4, |
---|
1620 | ".rodata", SHT_PROGBITS, SHF_ALLOC, 0, 0, Elf64Types::nobase)); |
---|
1621 | |
---|
1622 | elfBinGen64.addRegion(ElfRegion64(0, (const cxbyte*)nullptr, 8, |
---|
1623 | ".dynsym", SHT_DYNSYM, SHF_ALLOC, 0, 1, Elf64Types::nobase)); |
---|
1624 | elfBinGen64.addRegion(ElfRegion64(0, (const cxbyte*)nullptr, 4, |
---|
1625 | ".hash", SHT_HASH, SHF_ALLOC, |
---|
1626 | mainBuiltinSectTable[ELFSECTID_DYNSYM-ELFSECTID_START], 0, |
---|
1627 | Elf64Types::nobase)); |
---|
1628 | elfBinGen64.addRegion(ElfRegion64(0, (const cxbyte*)nullptr, 1, ".dynstr", SHT_STRTAB, |
---|
1629 | SHF_ALLOC, 0, 0, Elf64Types::nobase)); |
---|
1630 | // '.text' with alignment=4096 |
---|
1631 | elfBinGen64.addRegion(ElfRegion64(input->codeSize, (const cxbyte*)input->code, |
---|
1632 | 0x1000, ".text", SHT_PROGBITS, SHF_ALLOC|SHF_EXECINSTR, 0, 0, |
---|
1633 | Elf64Types::nobase, 0, false, 256)); |
---|
1634 | elfBinGen64.addRegion(ElfRegion64(0, (const cxbyte*)nullptr, 0x1000, |
---|
1635 | ".dynamic", SHT_DYNAMIC, SHF_ALLOC|SHF_WRITE, |
---|
1636 | mainBuiltinSectTable[ELFSECTID_DYNSTR-ELFSECTID_START], 0, |
---|
1637 | Elf64Types::nobase, 0, false, 8)); |
---|
1638 | if (!input->newBinFormat) |
---|
1639 | { |
---|
1640 | elfBinGen64.addRegion(ElfRegion64::noteSection()); |
---|
1641 | elfBinGen64.addRegion(ElfRegion64(0, (const cxbyte*)nullptr, 1, |
---|
1642 | ".AMDGPU.config", SHT_PROGBITS, 0)); |
---|
1643 | } |
---|
1644 | elfBinGen64.addRegion(ElfRegion64(commentSize, (const cxbyte*)comment, 1, ".comment", |
---|
1645 | SHT_PROGBITS, SHF_MERGE|SHF_STRINGS, 0, 0, 0, 1)); |
---|
1646 | elfBinGen64.addRegion(ElfRegion64(0, (const cxbyte*)nullptr, 8, |
---|
1647 | ".symtab", SHT_SYMTAB, 0, 0, 2)); |
---|
1648 | elfBinGen64.addRegion(ElfRegion64::shstrtabSection()); |
---|
1649 | elfBinGen64.addRegion(ElfRegion64::strtabSection()); |
---|
1650 | elfBinGen64.addRegion(ElfRegion64::sectionHeaderTable()); |
---|
1651 | |
---|
1652 | /* extra sections */ |
---|
1653 | for (const BinSection& section: input->extraSections) |
---|
1654 | elfBinGen64.addRegion(ElfRegion64(section, mainBuiltinSectTable, |
---|
1655 | ROCMSECTID_MAX, mainSectionsNum)); |
---|
1656 | /* extra symbols */ |
---|
1657 | for (const BinSymbol& symbol: input->extraSymbols) |
---|
1658 | elfBinGen64.addSymbol(ElfSymbol64(symbol, mainBuiltinSectTable, |
---|
1659 | ROCMSECTID_MAX, mainSectionsNum)); |
---|
1660 | |
---|
1661 | size_t binarySize = elfBinGen64.countSize(); |
---|
1662 | /**** |
---|
1663 | * prepare for write binary to output |
---|
1664 | ****/ |
---|
1665 | std::unique_ptr<std::ostream> outStreamHolder; |
---|
1666 | std::ostream* os = nullptr; |
---|
1667 | if (aPtr != nullptr) |
---|
1668 | { |
---|
1669 | aPtr->resize(binarySize); |
---|
1670 | outStreamHolder.reset( |
---|
1671 | new ArrayOStream(binarySize, reinterpret_cast<char*>(aPtr->data()))); |
---|
1672 | os = outStreamHolder.get(); |
---|
1673 | } |
---|
1674 | else if (vPtr != nullptr) |
---|
1675 | { |
---|
1676 | vPtr->resize(binarySize); |
---|
1677 | outStreamHolder.reset(new VectorOStream(*vPtr)); |
---|
1678 | os = outStreamHolder.get(); |
---|
1679 | } |
---|
1680 | else // from argument |
---|
1681 | os = osPtr; |
---|
1682 | |
---|
1683 | const std::ios::iostate oldExceptions = os->exceptions(); |
---|
1684 | try |
---|
1685 | { |
---|
1686 | os->exceptions(std::ios::failbit | std::ios::badbit); |
---|
1687 | /**** |
---|
1688 | * write binary to output |
---|
1689 | ****/ |
---|
1690 | FastOutputBuffer bos(256, *os); |
---|
1691 | elfBinGen64.generate(bos); |
---|
1692 | assert(bos.getWritten() == binarySize); |
---|
1693 | } |
---|
1694 | catch(...) |
---|
1695 | { |
---|
1696 | os->exceptions(oldExceptions); |
---|
1697 | throw; |
---|
1698 | } |
---|
1699 | os->exceptions(oldExceptions); |
---|
1700 | } |
---|
1701 | |
---|
1702 | void ROCmBinGenerator::generate(Array<cxbyte>& array) const |
---|
1703 | { |
---|
1704 | generateInternal(nullptr, nullptr, &array); |
---|
1705 | } |
---|
1706 | |
---|
1707 | void ROCmBinGenerator::generate(std::ostream& os) const |
---|
1708 | { |
---|
1709 | generateInternal(&os, nullptr, nullptr); |
---|
1710 | } |
---|
1711 | |
---|
1712 | void ROCmBinGenerator::generate(std::vector<char>& v) const |
---|
1713 | { |
---|
1714 | generateInternal(nullptr, &v, nullptr); |
---|
1715 | } |
---|