| | 253 | <h2>Sample code</h2> |
| | 254 | <p>This is sample example of the kernel setup:</p> |
| | 255 | <p><code>.rocm |
| | 256 | .gpu Carrizo |
| | 257 | .arch_minor 0 |
| | 258 | .arch_stepping 1 |
| | 259 | .kernel test1 |
| | 260 | .kernel test2 |
| | 261 | .text |
| | 262 | test1: |
| | 263 | .byte 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
| | 264 | .byte 0x01, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00 |
| | 265 | .byte 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
| | 266 | .fill 24, 1, 0x00 |
| | 267 | .byte 0x41, 0x00, 0x2c, 0x00, 0x90, 0x00, 0x00, 0x00 |
| | 268 | .byte 0x0b, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00 |
| | 269 | .fill 8, 1, 0x00 |
| | 270 | .byte 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
| | 271 | .byte 0x00, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x07, 0x00 |
| | 272 | .fill 8, 1, 0x00 |
| | 273 | .byte 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x06 |
| | 274 | .fill 152, 1, 0x00 |
| | 275 | /*c0020082 00000004*/ s_load_dword s2, s[4:5], 0x4 |
| | 276 | /*c0060003 00000000*/ s_load_dwordx2 s[0:1], s[6:7], 0x0 |
| | 277 | ....</code></p> |
| | 278 | <p>with kernel configuration:</p> |
| | 279 | <p><code>.rocm |
| | 280 | .gpu Carrizo |
| | 281 | .arch_minor 0 |
| | 282 | .arch_stepping 1 |
| | 283 | .kernel test1 |
| | 284 | .config |
| | 285 | .dims x |
| | 286 | .sgprsnum 16 |
| | 287 | .vgprsnum 8 |
| | 288 | .dx10clamp |
| | 289 | .floatmode 0xc0 |
| | 290 | .priority 0 |
| | 291 | .userdatanum 8 |
| | 292 | .pgmrsrc1 0x002c0041 |
| | 293 | .pgmrsrc2 0x00000090 |
| | 294 | .codeversion 1, 0 |
| | 295 | .machine 1, 8, 0, 1 |
| | 296 | .kernel_code_entry_offset 0x100 |
| | 297 | .use_private_segment_buffer |
| | 298 | .use_dispatch_ptr |
| | 299 | .use_kernarg_segment_ptr |
| | 300 | .private_elem_size 4 |
| | 301 | .use_ptr64 |
| | 302 | .kernarg_segment_size 8 |
| | 303 | .wavefront_sgpr_count 15 |
| | 304 | .workitem_vgpr_count 7 |
| | 305 | .kernarg_segment_align 16 |
| | 306 | .group_segment_align 16 |
| | 307 | .private_segment_align 16 |
| | 308 | .wavefront_size 64 |
| | 309 | .call_convention 0x0 |
| | 310 | .control_directive # optional |
| | 311 | .fill 128, 1, 0x00 |
| | 312 | .text |
| | 313 | test1: |
| | 314 | .skip 256 # skip ROCm kernel configuration (required) |
| | 315 | /*c0020082 00000004*/ s_load_dword s2, s[4:5], 0x4 |
| | 316 | /*c0060003 00000000*/ s_load_dwordx2 s[0:1], s[6:7], 0x0 |
| | 317 | /*bf8c007f */ s_waitcnt lgkmcnt(0) |
| | 318 | /*8602ff02 0000ffff*/ s_and_b32 s2, s2, 0xffff |
| | 319 | /*92020802 */ s_mul_i32 s2, s2, s8 |
| | 320 | /*32000002 */ v_add_u32 v0, vcc, s2, v0 |
| | 321 | /*2202009f */ v_ashrrev_i32 v1, 31, v0 |
| | 322 | /*d28f0001 00020082*/ v_lshlrev_b64 v[1:2], 2, v[0:1] |
| | 323 | /*32060200 */ v_add_u32 v3, vcc, s0, v1 |
| | 324 | ...</code></p> |