| 253 | <h2>Sample code</h2> |
| 254 | <p>This is sample example of the kernel setup:</p> |
| 255 | <p><code>.rocm |
| 256 | .gpu Carrizo |
| 257 | .arch_minor 0 |
| 258 | .arch_stepping 1 |
| 259 | .kernel test1 |
| 260 | .kernel test2 |
| 261 | .text |
| 262 | test1: |
| 263 | .byte 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
| 264 | .byte 0x01, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00 |
| 265 | .byte 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
| 266 | .fill 24, 1, 0x00 |
| 267 | .byte 0x41, 0x00, 0x2c, 0x00, 0x90, 0x00, 0x00, 0x00 |
| 268 | .byte 0x0b, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00 |
| 269 | .fill 8, 1, 0x00 |
| 270 | .byte 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
| 271 | .byte 0x00, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x07, 0x00 |
| 272 | .fill 8, 1, 0x00 |
| 273 | .byte 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x06 |
| 274 | .fill 152, 1, 0x00 |
| 275 | /*c0020082 00000004*/ s_load_dword s2, s[4:5], 0x4 |
| 276 | /*c0060003 00000000*/ s_load_dwordx2 s[0:1], s[6:7], 0x0 |
| 277 | ....</code></p> |
| 278 | <p>with kernel configuration:</p> |
| 279 | <p><code>.rocm |
| 280 | .gpu Carrizo |
| 281 | .arch_minor 0 |
| 282 | .arch_stepping 1 |
| 283 | .kernel test1 |
| 284 | .config |
| 285 | .dims x |
| 286 | .sgprsnum 16 |
| 287 | .vgprsnum 8 |
| 288 | .dx10clamp |
| 289 | .floatmode 0xc0 |
| 290 | .priority 0 |
| 291 | .userdatanum 8 |
| 292 | .pgmrsrc1 0x002c0041 |
| 293 | .pgmrsrc2 0x00000090 |
| 294 | .codeversion 1, 0 |
| 295 | .machine 1, 8, 0, 1 |
| 296 | .kernel_code_entry_offset 0x100 |
| 297 | .use_private_segment_buffer |
| 298 | .use_dispatch_ptr |
| 299 | .use_kernarg_segment_ptr |
| 300 | .private_elem_size 4 |
| 301 | .use_ptr64 |
| 302 | .kernarg_segment_size 8 |
| 303 | .wavefront_sgpr_count 15 |
| 304 | .workitem_vgpr_count 7 |
| 305 | .kernarg_segment_align 16 |
| 306 | .group_segment_align 16 |
| 307 | .private_segment_align 16 |
| 308 | .wavefront_size 64 |
| 309 | .call_convention 0x0 |
| 310 | .control_directive # optional |
| 311 | .fill 128, 1, 0x00 |
| 312 | .text |
| 313 | test1: |
| 314 | .skip 256 # skip ROCm kernel configuration (required) |
| 315 | /*c0020082 00000004*/ s_load_dword s2, s[4:5], 0x4 |
| 316 | /*c0060003 00000000*/ s_load_dwordx2 s[0:1], s[6:7], 0x0 |
| 317 | /*bf8c007f */ s_waitcnt lgkmcnt(0) |
| 318 | /*8602ff02 0000ffff*/ s_and_b32 s2, s2, 0xffff |
| 319 | /*92020802 */ s_mul_i32 s2, s2, s8 |
| 320 | /*32000002 */ v_add_u32 v0, vcc, s2, v0 |
| 321 | /*2202009f */ v_ashrrev_i32 v1, 31, v0 |
| 322 | /*d28f0001 00020082*/ v_lshlrev_b64 v[1:2], 2, v[0:1] |
| 323 | /*32060200 */ v_add_u32 v3, vcc, s0, v1 |
| 324 | ...</code></p> |