Changes between Version 30 and Version 31 of GcnInstrsVop2


Ignore:
Timestamp:
11/24/17 22:00:27 (6 years ago)
Author:
trac
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • GcnInstrsVop2

    v30 v31  
    532532</tbody>
    533533</table>
     534<p>List of the instructions by opcode (GCN 1.4):</p>
     535<table>
     536<thead>
     537<tr>
     538<th>Opcode</th>
     539<th>Opcode(VOP3)</th>
     540<th>Mnemonic (GCN 1.4)</th>
     541</tr>
     542</thead>
     543<tbody>
     544<tr>
     545<td>0 (0x0)</td>
     546<td>256 (0x100)</td>
     547<td>V_CNDMASK_B32</td>
     548</tr>
     549<tr>
     550<td>1 (0x1)</td>
     551<td>257 (0x101)</td>
     552<td>V_ADD_F32</td>
     553</tr>
     554<tr>
     555<td>2 (0x2)</td>
     556<td>258 (0x102)</td>
     557<td>V_SUB_F32</td>
     558</tr>
     559<tr>
     560<td>3 (0x3)</td>
     561<td>259 (0x103)</td>
     562<td>V_SUBREV_F32</td>
     563</tr>
     564<tr>
     565<td>4 (0x4)</td>
     566<td>260 (0x104)</td>
     567<td>V_MUL_LEGACY_F32</td>
     568</tr>
     569<tr>
     570<td>5 (0x5)</td>
     571<td>261 (0x105)</td>
     572<td>V_MUL_F32</td>
     573</tr>
     574<tr>
     575<td>6 (0x6)</td>
     576<td>262 (0x106)</td>
     577<td>V_MUL_I32_I24</td>
     578</tr>
     579<tr>
     580<td>7 (0x7)</td>
     581<td>263 (0x107)</td>
     582<td>V_MUL_HI_I32_I24</td>
     583</tr>
     584<tr>
     585<td>8 (0x8)</td>
     586<td>264 (0x108)</td>
     587<td>V_MUL_U32_U24</td>
     588</tr>
     589<tr>
     590<td>9 (0x9)</td>
     591<td>265 (0x109)</td>
     592<td>V_MUL_HI_U32_U24</td>
     593</tr>
     594<tr>
     595<td>10 (0xa)</td>
     596<td>266 (0x10a)</td>
     597<td>V_MIN_F32</td>
     598</tr>
     599<tr>
     600<td>11 (0xb)</td>
     601<td>267 (0x10b)</td>
     602<td>V_MAX_F32</td>
     603</tr>
     604<tr>
     605<td>12 (0xc)</td>
     606<td>268 (0x10c)</td>
     607<td>V_MIN_I32</td>
     608</tr>
     609<tr>
     610<td>13 (0xd)</td>
     611<td>269 (0x10d)</td>
     612<td>V_MAX_I32</td>
     613</tr>
     614<tr>
     615<td>14 (0xe)</td>
     616<td>270 (0x10e)</td>
     617<td>V_MIN_U32</td>
     618</tr>
     619<tr>
     620<td>15 (0xf)</td>
     621<td>271 (0x10f)</td>
     622<td>V_MAX_U32</td>
     623</tr>
     624<tr>
     625<td>16 (0x10)</td>
     626<td>272 (0x110)</td>
     627<td>V_LSHRREV_B32</td>
     628</tr>
     629<tr>
     630<td>17 (0x11)</td>
     631<td>273 (0x111)</td>
     632<td>V_ASHRREV_I32</td>
     633</tr>
     634<tr>
     635<td>18 (0x12)</td>
     636<td>274 (0x112)</td>
     637<td>V_LSHLREV_B32</td>
     638</tr>
     639<tr>
     640<td>19 (0x13)</td>
     641<td>275 (0x113)</td>
     642<td>V_AND_B32</td>
     643</tr>
     644<tr>
     645<td>20 (0x14)</td>
     646<td>276 (0x114)</td>
     647<td>V_OR_B32</td>
     648</tr>
     649<tr>
     650<td>21 (0x15)</td>
     651<td>277 (0x115)</td>
     652<td>V_XOR_B32</td>
     653</tr>
     654<tr>
     655<td>22 (0x16)</td>
     656<td>278 (0x116)</td>
     657<td>V_MAC_F32</td>
     658</tr>
     659<tr>
     660<td>23 (0x17)</td>
     661<td>279 (0x117)</td>
     662<td>V_MADMK_F32</td>
     663</tr>
     664<tr>
     665<td>24 (0x18)</td>
     666<td>280 (0x118)</td>
     667<td>V_MADAK_F32</td>
     668</tr>
     669<tr>
     670<td>25 (0x19)</td>
     671<td>281 (0x119)</td>
     672<td>V_ADD_CO_U32 (VOP3B)</td>
     673</tr>
     674<tr>
     675<td>26 (0x1a)</td>
     676<td>282 (0x11a)</td>
     677<td>V_SUB_CO_U32 (VOP3B)</td>
     678</tr>
     679<tr>
     680<td>27 (0x1b)</td>
     681<td>283 (0x11b)</td>
     682<td>V_SUBREV_CO_U32 (VOP3B)</td>
     683</tr>
     684<tr>
     685<td>28 (0x1c)</td>
     686<td>284 (0x11c)</td>
     687<td>V_ADDC_CO_U32 (VOP3B)</td>
     688</tr>
     689<tr>
     690<td>29 (0x1d)</td>
     691<td>285 (0x11d)</td>
     692<td>V_SUBB_CO_U32 (VOP3B)</td>
     693</tr>
     694<tr>
     695<td>30 (0x1e)</td>
     696<td>286 (0x11e)</td>
     697<td>V_SUBBREV_CO_U32 (VOP3B)</td>
     698</tr>
     699<tr>
     700<td>31 (0x1f)</td>
     701<td>287 (0x11f)</td>
     702<td>V_ADD_F16</td>
     703</tr>
     704<tr>
     705<td>32 (0x20)</td>
     706<td>288 (0x120)</td>
     707<td>V_SUB_F16</td>
     708</tr>
     709<tr>
     710<td>33 (0x21)</td>
     711<td>289 (0x121)</td>
     712<td>V_SUBREV_F16</td>
     713</tr>
     714<tr>
     715<td>34 (0x22)</td>
     716<td>290 (0x122)</td>
     717<td>V_MUL_F16</td>
     718</tr>
     719<tr>
     720<td>35 (0x23)</td>
     721<td>291 (0x123)</td>
     722<td>V_MAC_F16</td>
     723</tr>
     724<tr>
     725<td>36 (0x24)</td>
     726<td>292 (0x124)</td>
     727<td>V_MADMK_F16</td>
     728</tr>
     729<tr>
     730<td>37 (0x25)</td>
     731<td>293 (0x125)</td>
     732<td>V_MADAK_F16</td>
     733</tr>
     734<tr>
     735<td>38 (0x26)</td>
     736<td>294 (0x126)</td>
     737<td>V_ADD_U16</td>
     738</tr>
     739<tr>
     740<td>39 (0x27)</td>
     741<td>295 (0x127)</td>
     742<td>V_SUB_U16</td>
     743</tr>
     744<tr>
     745<td>40 (0x28)</td>
     746<td>296 (0x128)</td>
     747<td>V_SUBREV_U16</td>
     748</tr>
     749<tr>
     750<td>41 (0x29)</td>
     751<td>297 (0x129)</td>
     752<td>V_MUL_LO_U16</td>
     753</tr>
     754<tr>
     755<td>42 (0x2a)</td>
     756<td>298 (0x12a)</td>
     757<td>V_LSHLREV_B16</td>
     758</tr>
     759<tr>
     760<td>43 (0x2b)</td>
     761<td>299 (0x12b)</td>
     762<td>V_LSHRREV_B16</td>
     763</tr>
     764<tr>
     765<td>44 (0x2c)</td>
     766<td>300 (0x12c)</td>
     767<td>V_ASHRREV_I16</td>
     768</tr>
     769<tr>
     770<td>45 (0x2d)</td>
     771<td>301 (0x12d)</td>
     772<td>V_MAX_F16</td>
     773</tr>
     774<tr>
     775<td>46 (0x2e)</td>
     776<td>302 (0x12e)</td>
     777<td>V_MIN_F16</td>
     778</tr>
     779<tr>
     780<td>47 (0x2f)</td>
     781<td>303 (0x12f)</td>
     782<td>V_MAX_U16</td>
     783</tr>
     784<tr>
     785<td>48 (0x30)</td>
     786<td>304 (0x130)</td>
     787<td>V_MAX_I16</td>
     788</tr>
     789<tr>
     790<td>49 (0x31)</td>
     791<td>305 (0x131)</td>
     792<td>V_MIN_U16</td>
     793</tr>
     794<tr>
     795<td>50 (0x32)</td>
     796<td>306 (0x132)</td>
     797<td>V_MIN_I16</td>
     798</tr>
     799<tr>
     800<td>51 (0x33)</td>
     801<td>307 (0x133)</td>
     802<td>V_LDEXP_F16</td>
     803</tr>
     804<tr>
     805<td>52 (0x34)</td>
     806<td>308 (0x134)</td>
     807<td>V_ADD_U32</td>
     808</tr>
     809<tr>
     810<td>53 (0x35)</td>
     811<td>309 (0x135)</td>
     812<td>V_SUB_U32</td>
     813</tr>
     814<tr>
     815<td>54 (0x36)</td>
     816<td>310 (0x136)</td>
     817<td>V_SUBREV_U32</td>
     818</tr>
     819</tbody>
     820</table>
    534821<h3>Instruction set</h3>
    535822<p>Alphabetically sorted instruction list:</p>
     
    548835Operation:<br />
    549836<code>VDST = ASFLOAT(SRC0) + ASFLOAT(SRC1)</code></p>
    550 <h4>V_ADD_I32, V_ADD_U32</h4>
     837<h4>V_ADD_CO_U32</h4>
     838<p>Opcode VOP2: 25 (0x19) for GCN 1.4<br />
     839Opcode VOP3B: 281 (0x119) for GCN 1.4<br />
     840Syntax VOP2: V_ADD_CO_U32 VDST, VCC, SRC0, SRC1<br />
     841Syntax VOP3B: V_ADD_CO_U32 VDST, SDST(2), SRC0, SRC1<br />
     842Description: Add SRC0 to SRC1 and store result to VDST and store carry flag to
     843SDST (or VCC) bit with number that equal to lane id. SDST is 64-bit.
     844Bits for inactive threads in SDST are always zeroed.<br />
     845Operation:<br />
     846<code>UINT64 temp = (UINT64)SRC0 + (UINT64)SRC1
     847VDST = temp
     848SDST = 0
     849UINT64 mask = (1ULL&lt;&lt;LANEID)
     850SDST = (SDST&amp;~mask) | ((temp &gt;&gt; 32) ? mask : 0)</code></p>
     851<h4>V_ADD_I32, V_ADD_U32 (GCN 1.0/1.1/1.2)</h4>
    551852<p>Opcode VOP2: 37 (0x25) for GCN 1.0/1.1; 25 (0x19) for GCN 1.2<br />
    552853Opcode VOP3B: 293 (0x125) for GCN 1.0/1.1; 281 (0x119) for GCN 1.2<br />
     
    572873Operation:<br />
    573874<code>VDST = (SRC0 + SRC1) &amp; 0xffff</code></p>
    574 <h4>V_ADDC_U32</h4>
     875<h4>V_ADD_U32 (GCN 1.4)</h4>
     876<p>Opcode VOP2: 52 (0x34) for GCN 1.4<br />
     877Opcode VOP3B: 308 (0x134) for GCN 1.4<br />
     878Syntax: V_ADD_U32 VDST, SRC0, SRC1<br />
     879Description: Add SRC0 to SRC1 and store result to VDST.<br />
     880Operation:<br />
     881<code>VDST = SRC0 + SRC1</code></p>
     882<h4>V_ADDC_CO_U32</h4>
     883<p>Opcode VOP2: 28 (0x1c) for GCN 1.4<br />
     884Opcode VOP3B: 284 (0x11c) for GCN 1.4<br />
     885Syntax VOP2: V_ADDC_CO_U32 VDST, VCC, SRC0, SRC1, VCC<br />
     886Syntax VOP3B: V_ADDC_CO_U32 VDST, SDST(2), SRC0, SRC1, SSRC2(2)<br />
     887Description: Add SRC0 to SRC1 with carry stored in SSRC2 bit with number that equal lane id,
     888and store result to VDST and store carry flag to SDST (or VCC) bit with number
     889that equal to lane id. SDST and SSRC2 are 64-bit.
     890Bits for inactive threads in SDST are always zeroed.<br />
     891Operation:<br />
     892<code>UINT64 mask = (1ULL&lt;&lt;LANEID)
     893UINT8 CC = ((SSRC2&amp;mask) ? 1 : 0)
     894UINT64 temp = (UINT64)SRC0 + (UINT64)SRC1 + CC
     895SDST = 0
     896VDST = temp
     897SDST = (SDST&amp;~mask) | ((temp &gt;&gt; 32) ? mask : 0)</code></p>
     898<h4>V_ADDC_U32 (GCN 1.0/1.1/1.2)</h4>
    575899<p>Opcode VOP2: 40 (0x28) for GCN 1.0/1.1; 28 (0x1c) for GCN 1.2<br />
    576900Opcode VOP3B: 296 (0x128) for GCN 1.0/1.1; 284 (0x11c) for GCN 1.2<br />
    577 Syntax VOP2 GCN 1.0/1.1: V_ADDC_U32 VDST, VCC, SRC0, SRC1, VCC<br />
    578 Syntax VOP3B GCN 1.2: V_ADDC_U32 VDST, SDST(2), SRC0, SRC1, SSRC2(2)<br />
     901Syntax VOP2: V_ADDC_U32 VDST, VCC, SRC0, SRC1, VCC<br />
     902Syntax VOP3B: V_ADDC_U32 VDST, SDST(2), SRC0, SRC1, SSRC2(2)<br />
    579903Description: Add SRC0 to SRC1 with carry stored in SSRC2 bit with number that equal lane id,
    580904and store result to VDST and store carry flag to SDST (or VCC) bit with number
     
    10861410Operation:<br />
    10871411<code>VDST = (SRC0 - SRC1) &amp; 0xffff</code></p>
    1088 <h4>V_SUB_I32, V_SUB_U32</h4>
     1412<h4>V_SUB_CO_U32</h4>
     1413<p>Opcode VOP2: 26 (0x1a) for GCN 1.4<br />
     1414Opcode VOP3B: 282 (0x11a) for GCN 1.4<br />
     1415Syntax VOP2: V_SUB_CO_U32 VDST, VCC, SRC0, SRC1<br />
     1416Syntax VOP3B: V_SUB_CO_U32 VDST, SDST(2), SRC0, SRC1<br />
     1417Description: Subtract SRC1 from SRC0 and store result to VDST and store borrow flag to
     1418SDST (or VCC) bit with number that equal to lane id. SDST is 64-bit.
     1419Bits for inactive threads in SDST are always zeroed.<br />
     1420Operation:<br />
     1421<code>UINT64 temp = (UINT64)SRC0 - (UINT64)SRC1
     1422VDST = temp
     1423SDST = 0
     1424UINT64 mask = (1ULL&lt;&lt;LANEID)
     1425SDST = (SDST&amp;~mask) | ((temp&gt;&gt;32) ? mask : 0)</code></p>
     1426<h4>V_SUB_I32, V_SUB_U32 (GCN 1.0/1.1/1.2)</h4>
    10891427<p>Opcode VOP2: 38 (0x26) for GCN 1.0/1.1; 26 (0x1a) for GCN 1.2<br />
    10901428Opcode VOP3B: 294 (0x126) for GCN 1.0/1.1; 282 (0x11a) for GCN 1.2<br />
     
    11021440UINT64 mask = (1ULL&lt;&lt;LANEID)
    11031441SDST = (SDST&amp;~mask) | ((temp&gt;&gt;32) ? mask : 0)</code></p>
    1104 <h4>V_SUBB_U32</h4>
    1105 <p>Opcode VOP2: 41 (0x29) for GCN 1.0/1.1; 29 (0x1d) for GCN 1.2<br />
    1106 Opcode VOP3B: 297 (0x129) for GCN 1.0/1.1; 285 (0x11d) for GCN 1.2<br />
    1107 Syntax VOP2 GCN 1.0/1.1: V_SUBB_U32 VDST, VCC, SRC0, SRC1, VCC<br />
    1108 Syntax VOP3B GCN 1.2: V_SUBB_U32 VDST, SDST(2), SRC0, SRC1, SSRC2(2)<br />
     1442<h4>V_SUB_U32 (GCN 1.4)</h4>
     1443<p>Opcode VOP2: 53 (0x35) for GCN 1.4<br />
     1444Opcode VOP3B: 309 (0x135) for GCN 1.4<br />
     1445Syntax: V_SUB_U32 VDST, SRC0, SRC1<br />
     1446Description: Subtract SRC1 with borrow from SRC0, and store result to VDST.<br />
     1447Operation:<br />
     1448<code>VDST = SRC0 - SRC1</code></p>
     1449<h4>V_SUBB_CO_U32</h4>
     1450<p>Opcode VOP2: 29 (0x1d) for GCN 1.4<br />
     1451Opcode VOP3B: 285 (0x11d) for GCN 1.4<br />
     1452Syntax VOP2: V_SUBB_CO_U32 VDST, VCC, SRC0, SRC1, VCC<br />
     1453Syntax VOP3B: V_SUBB_CO_U32 VDST, SDST(2), SRC0, SRC1, SSRC2(2)<br />
    11091454Description: Subtract SRC1 with borrow from SRC0,
    11101455and store result to VDST and store carry flag to SDST (or VCC) bit with number
     
    11181463VDST = temp
    11191464SDST = (SDST&amp;~mask) | ((temp &gt;&gt; 32) ? mask : 0)</code></p>
    1120 <h4>V_SUBBREV_U32</h4>
     1465<h4>V_SUBB_U32 (GCN 1.0/1.1/1.2)</h4>
     1466<p>Opcode VOP2: 41 (0x29) for GCN 1.0/1.1; 29 (0x1d) for GCN 1.2<br />
     1467Opcode VOP3B: 297 (0x129) for GCN 1.0/1.1; 285 (0x11d) for GCN 1.2<br />
     1468Syntax VOP2: V_SUBB_U32 VDST, VCC, SRC0, SRC1, VCC<br />
     1469Syntax VOP3B: V_SUBB_U32 VDST, SDST(2), SRC0, SRC1, SSRC2(2)<br />
     1470Description: Subtract SRC1 with borrow from SRC0,
     1471and store result to VDST and store carry flag to SDST (or VCC) bit with number
     1472that equal to lane id. Borrow is stored in SSRC2 bit with number of lane id.
     1473SDST and SSRC2 are 64-bit. Bits for inactive threads in SDST are always zeroed.<br />
     1474Operation:<br />
     1475<code>UINT64 mask = (1ULL&lt;&lt;LANEID)
     1476UINT8 CC = ((SSRC2&amp;mask) ? 1 : 0)
     1477UINT64 temp = (UINT64)SRC0 - (UINT64)SRC1 - CC
     1478SDST = 0
     1479VDST = temp
     1480SDST = (SDST&amp;~mask) | ((temp &gt;&gt; 32) ? mask : 0)</code></p>
     1481<h4>V_SUBBREV_CO_U32</h4>
     1482<p>Opcode VOP2: 30 (0x1e) for GCN 1.4<br />
     1483Opcode VOP3B: 286 (0x11e) for GCN 1.4<br />
     1484Syntax VOP2: V_SUBBREV_CO_U32 VDST, VCC, SRC0, SRC1, VCC<br />
     1485Syntax VOP3B: V_SUBBREV_CO_U32 VDST, SDST(2), SRC0, SRC1, SSRC2(2)<br />
     1486Description: Subtract SRC0 with borrow from SRC1,
     1487and store result to VDST and store carry flag to SDST (or VCC) bit with number
     1488that equal to lane id. Borrow is stored in SSRC2 bit with number of lane id.
     1489SDST and SSRC2 are 64-bit. Bits for inactive threads in SDST are always zeroed.<br />
     1490Operation:<br />
     1491<code>UINT64 mask = (1ULL&lt;&lt;LANEID)
     1492UINT8 CC = ((SSRC2&amp;mask) ? 1 : 0)
     1493UINT64 temp = (UINT64)SRC1 - (UINT64)SRC0 - CC
     1494SDST = 0
     1495VDST = temp
     1496SDST = (SDST&amp;~mask) | ((temp &gt;&gt; 32) ? mask : 0)</code></p>
     1497<h4>V_SUBBREV_U32 (GCN 1.0/1.1/1.2)</h4>
    11211498<p>Opcode VOP2: 42 (0x2a) for GCN 1.0/1.1; 30 (0x1e) for GCN 1.2<br />
    11221499Opcode VOP3B: 298 (0x12a) for GCN 1.0/1.1; 286 (0x11e) for GCN 1.2<br />
    1123 Syntax VOP2 GCN 1.0/1.1: V_SUBBREV_U32 VDST, VCC, SRC0, SRC1, VCC<br />
    1124 Syntax VOP3B GCN 1.2: V_SUBBREV_U32 VDST, SDST(2), SRC0, SRC1, SSRC2(2)<br />
     1500Syntax VOP2: V_SUBBREV_U32 VDST, VCC, SRC0, SRC1, VCC<br />
     1501Syntax VOP3B: V_SUBBREV_U32 VDST, SDST(2), SRC0, SRC1, SSRC2(2)<br />
    11251502Description: Subtract SRC0 with borrow from SRC1,
    11261503and store result to VDST and store carry flag to SDST (or VCC) bit with number
     
    11481525Operation:<br />
    11491526<code>VDST = ASFLOAT(SRC1) - ASFLOAT(SRC0)</code></p>
    1150 <h4>V_SUBREV_I32, V_SUBREV_U32</h4>
     1527<h4>V_SUBREV_I32, V_SUBREV_U32 (GCN 1.0/1.1/1.2)</h4>
    11511528<p>Opcode VOP2: 39 (0x27) for GCN 1.0/1.1; 27 (0x1b) for GCN 1.2<br />
    11521529Opcode VOP3B: 295 (0x127) for GCN 1.0/1.1; 283 (0x11b) for GCN 1.2<br />
     
    11721549Operation:<br />
    11731550<code>VDST = (SRC1 - SRC0) &amp; 0xffff</code></p>
     1551<h4>V_SUBREV_U32 (GCN 1.4)</h4>
     1552<p>Opcode VOP2: 54 (0x36) for GCN 1.4<br />
     1553Opcode VOP3B: 310 (0x136) for GCN 1.4<br />
     1554Syntax: V_SUBREV_U32 VDST, SRC0, SRC1<br />
     1555Description: Subtract SRC0 with borrow from SRC1, and store result to VDST.<br />
     1556Operation:<br />
     1557<code>VDST = SRC1 - SRC0</code></p>
    11741558<h4>V_XOR_B32</h4>
    11751559<p>Opcode: VOP2: 29 (0x1d) for GCN 1.0/1.1; 21 (0x15) for GCN 1.2<br />