| 581 | <p>The sample with metadata info with two kernels:</p> |
| 582 | <p><code>.rocm |
| 583 | .gpu Fiji |
| 584 | .arch_minor 0 |
| 585 | .arch_stepping 4 |
| 586 | .eflags 2 |
| 587 | .newbinfmt |
| 588 | .tripple "amdgcn-amd-amdhsa-amdgizcl" |
| 589 | .md_version 1, 0 |
| 590 | .kernel vectorAdd |
| 591 | .config |
| 592 | .dims x |
| 593 | .codeversion 1, 1 |
| 594 | .use_private_segment_buffer |
| 595 | .use_dispatch_ptr |
| 596 | .use_kernarg_segment_ptr |
| 597 | .private_elem_size 4 |
| 598 | .use_ptr64 |
| 599 | .kernarg_segment_align 16 |
| 600 | .group_segment_align 16 |
| 601 | .private_segment_align 16 |
| 602 | .control_directive |
| 603 | .fill 128, 1, 0x00 |
| 604 | .config |
| 605 | .md_language "OpenCL", 1, 2 |
| 606 | .arg n, "uint", 4, , value, u32 |
| 607 | .arg a, "float*", 8, , globalbuf, f32, global, default const volatile |
| 608 | .arg b, "float*", 8, , globalbuf, f32, global, default const |
| 609 | .arg c, "float*", 8, , globalbuf, f32, global, default |
| 610 | .arg , "", 8, , gox, i64 |
| 611 | .arg , "", 8, , goy, i64 |
| 612 | .arg , "", 8, , goz, i64 |
| 613 | .arg , "", 8, , printfbuf, i8 |
| 614 | .kernel vectorAdd2 |
| 615 | .config |
| 616 | .dims x |
| 617 | .codeversion 1, 1 |
| 618 | .use_private_segment_buffer |
| 619 | .use_dispatch_ptr |
| 620 | .use_kernarg_segment_ptr |
| 621 | .private_elem_size 4 |
| 622 | .use_ptr64 |
| 623 | .kernarg_segment_align 16 |
| 624 | .group_segment_align 16 |
| 625 | .private_segment_align 16 |
| 626 | .control_directive |
| 627 | .fill 128, 1, 0x00 |
| 628 | .config |
| 629 | .md_language "OpenCL", 1, 2 |
| 630 | .arg n, "uint", 4, , value, u32 |
| 631 | .arg a, "float*", 8, , globalbuf, f32, global, default const volatile |
| 632 | .arg b, "float*", 8, , globalbuf, f32, global, default const |
| 633 | .arg c, "float*", 8, , globalbuf, f32, global, default |
| 634 | .arg , "", 8, , gox, i64 |
| 635 | .arg , "", 8, , goy, i64 |
| 636 | .arg , "", 8, , goz, i64 |
| 637 | .arg , "", 8, , printfbuf, i8 |
| 638 | .text |
| 639 | vectorAdd: |
| 640 | .skip 256 # skip ROCm kernel configuration (required) |
| 641 | s_mov_b32 s8, s1 |
| 642 | ... |
| 643 | ... |
| 644 | s_endpgm |
| 645 | .p2align 8 # important alignment to 256-byte boundary |
| 646 | vectorAdd2 |
| 647 | .skip 256 |
| 648 | s_mov_b32 s8, s1 |
| 649 | ... |
| 650 | ... |
| 651 | s_endpgm</code></p> |