Changeset 3267 in CLRX


Ignore:
Timestamp:
Aug 31, 2017, 1:31:44 PM (13 months ago)
Author:
matszpk
Message:

CLRadeonExtender: Write ReverseBits? code for GalliumCompute? with LLVM 4.0 or later.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • CLRadeonExtender/trunk/samples/ReverseBits.cpp

    r3157 r3267  
    258258    .if LLVM_VERSION>=40000
    259259        .skip 256
    260     .endif
     260        s_load_dword s2, s[4:5], 1*SMUL         # s2 - local_size(0)
     261        s_load_dword s3, s[6:7], 0              # s3 - n
     262        s_load_dword s1, s[6:7], 7*SMUL         # s1 - global_offset(0)
     263        s_load_dwordx2 s[10:11], s[6:7], 2*SMUL   # s[10:11] - input pointer
     264        s_load_dwordx2 s[6:7], s[6:7], 4*SMUL  # s[6:7] - output pointer
     265        s_waitcnt lgkmcnt(0)            # wait for results
     266        s_and_b32 s2, s2, 0xffff        # only local_size(0)
     267        s_mul_i32 s0, s2, s8            # s0 - local_size(0)*group_id(0)
     268        s_mov_b64 s[8:9], s[10:11]      # move input pointer to proper place
     269        s_add_u32 s0, s0, s1            # s0 - local_size(0)*group_id(0)+global_offset(0)
     270        v_add_i32 v0, vcc, s0, v0       # v0 - s0+local_id(0) -> global_id(0)
     271        v_cmp_gt_u32 vcc, s3, v0                # global_id(0) < n
     272        s_and_saveexec_b64 s[0:1], vcc          # lock all threads with id>=n
     273        s_cbranch_execz end                     # no active threads, we jump to end
     274    .else
    261275        s_load_dword s2, s[0:1], 6*SMUL         # s2 - local_size(0)
    262276        s_load_dword s3, s[0:1], 9*SMUL         # s3 - n
     
    271285        s_and_saveexec_b64 s[0:1], vcc          # lock all threads with id>=n
    272286        s_cbranch_execz end                     # no active threads, we jump to end
     287    .endif
    273288    .ifnarch GCN1.2
    274289        s_mov_b32 s4, s6
Note: See TracChangeset for help on using the changeset viewer.