Changes between Version 28 and Version 29 of GcnInstrsVop3


Ignore:
Timestamp:
11/25/17 14:00:31 (6 years ago)
Author:
trac
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • GcnInstrsVop3

    v28 v29  
    463463</tbody>
    464464</table>
    465 <p>List of the instructions by opcode (GCN 1.2):</p>
     465<p>List of the instructions by opcode (GCN 1.2/1.4):</p>
    466466<table>
    467467<thead>
    468468<tr>
    469469<th>Opcode</th>
     470<th>GCN 1.2</th>
     471<th>GCN 1.4</th>
    470472<th>Mnemonic</th>
    471473</tr>
     
    474476<tr>
    475477<td>448 (0x1c0)</td>
     478<td>✓</td>
     479<td>✓</td>
    476480<td>V_MAD_LEGACY_F32</td>
    477481</tr>
    478482<tr>
    479483<td>449 (0x1c1)</td>
     484<td>✓</td>
     485<td>✓</td>
    480486<td>V_MAD_F32</td>
    481487</tr>
    482488<tr>
    483489<td>450 (0x1c2)</td>
     490<td>✓</td>
     491<td>✓</td>
    484492<td>V_MAD_I32_I24</td>
    485493</tr>
    486494<tr>
    487495<td>451 (0x1c3)</td>
     496<td>✓</td>
     497<td>✓</td>
    488498<td>V_MAD_U32_U24</td>
    489499</tr>
    490500<tr>
    491501<td>452 (0x1c4)</td>
     502<td>✓</td>
     503<td>✓</td>
    492504<td>V_CUBEID_F32</td>
    493505</tr>
    494506<tr>
    495507<td>453 (0x1c5)</td>
     508<td>✓</td>
     509<td>✓</td>
    496510<td>V_CUBESC_F32</td>
    497511</tr>
    498512<tr>
    499513<td>454 (0x1c6)</td>
     514<td>✓</td>
     515<td>✓</td>
    500516<td>V_CUBETC_F32</td>
    501517</tr>
    502518<tr>
    503519<td>455 (0x1c7)</td>
     520<td>✓</td>
     521<td>✓</td>
    504522<td>V_CUBEMA_F32</td>
    505523</tr>
    506524<tr>
    507525<td>456 (0x1c8)</td>
     526<td>✓</td>
     527<td>✓</td>
    508528<td>V_BFE_U32</td>
    509529</tr>
    510530<tr>
    511531<td>457 (0x1c9)</td>
     532<td>✓</td>
     533<td>✓</td>
    512534<td>V_BFE_I32</td>
    513535</tr>
    514536<tr>
    515537<td>458 (0x1ca)</td>
     538<td>✓</td>
     539<td>✓</td>
    516540<td>V_BFI_B32</td>
    517541</tr>
    518542<tr>
    519543<td>459 (0x1cb)</td>
     544<td>✓</td>
     545<td>✓</td>
    520546<td>V_FMA_F32</td>
    521547</tr>
    522548<tr>
    523549<td>460 (0x1cc)</td>
     550<td>✓</td>
     551<td>✓</td>
    524552<td>V_FMA_F64</td>
    525553</tr>
    526554<tr>
    527555<td>461 (0x1cd)</td>
     556<td>✓</td>
     557<td>✓</td>
    528558<td>V_LERP_U8</td>
    529559</tr>
    530560<tr>
    531561<td>462 (0x1ce)</td>
     562<td>✓</td>
     563<td>✓</td>
    532564<td>V_ALIGNBIT_B32</td>
    533565</tr>
    534566<tr>
    535567<td>463 (0x1cf)</td>
     568<td>✓</td>
     569<td>✓</td>
    536570<td>V_ALIGNBYTE_B32</td>
    537571</tr>
    538572<tr>
    539573<td>464 (0x1d0)</td>
     574<td>✓</td>
     575<td>✓</td>
    540576<td>V_MIN3_F32</td>
    541577</tr>
    542578<tr>
    543579<td>465 (0x1d1)</td>
     580<td>✓</td>
     581<td>✓</td>
    544582<td>V_MIN3_I32</td>
    545583</tr>
    546584<tr>
    547585<td>466 (0x1d2)</td>
     586<td>✓</td>
     587<td>✓</td>
    548588<td>V_MIN3_U32</td>
    549589</tr>
    550590<tr>
    551591<td>467 (0x1d3)</td>
     592<td>✓</td>
     593<td>✓</td>
    552594<td>V_MAX3_F32</td>
    553595</tr>
    554596<tr>
    555597<td>468 (0x1d4)</td>
     598<td>✓</td>
     599<td>✓</td>
    556600<td>V_MAX3_I32</td>
    557601</tr>
    558602<tr>
    559603<td>469 (0x1d5)</td>
     604<td>✓</td>
     605<td>✓</td>
    560606<td>V_MAX3_U32</td>
    561607</tr>
    562608<tr>
    563609<td>470 (0x1d6)</td>
     610<td>✓</td>
     611<td>✓</td>
    564612<td>V_MED3_F32</td>
    565613</tr>
    566614<tr>
    567615<td>471 (0x1d7)</td>
     616<td>✓</td>
     617<td>✓</td>
    568618<td>V_MED3_I32</td>
    569619</tr>
    570620<tr>
    571621<td>472 (0x1d8)</td>
     622<td>✓</td>
     623<td>✓</td>
    572624<td>V_MED3_U32</td>
    573625</tr>
    574626<tr>
    575627<td>473 (0x1d9)</td>
     628<td>✓</td>
     629<td>✓</td>
    576630<td>V_SAD_U8</td>
    577631</tr>
    578632<tr>
    579633<td>474 (0x1da)</td>
     634<td>✓</td>
     635<td>✓</td>
    580636<td>V_SAD_HI_U8</td>
    581637</tr>
    582638<tr>
    583639<td>475 (0x1db)</td>
     640<td>✓</td>
     641<td>✓</td>
    584642<td>V_SAD_U16</td>
    585643</tr>
    586644<tr>
    587645<td>476 (0x1dc)</td>
     646<td>✓</td>
     647<td>✓</td>
    588648<td>V_SAD_U32</td>
    589649</tr>
    590650<tr>
    591651<td>477 (0x1dd)</td>
     652<td>✓</td>
     653<td>✓</td>
    592654<td>V_CVT_PK_U8_F32</td>
    593655</tr>
    594656<tr>
    595657<td>478 (0x1de)</td>
     658<td>✓</td>
     659<td>✓</td>
    596660<td>V_DIV_FIXUP_F32</td>
    597661</tr>
    598662<tr>
    599663<td>479 (0x1df)</td>
     664<td>✓</td>
     665<td>✓</td>
    600666<td>V_DIV_FIXUP_F64</td>
    601667</tr>
    602668<tr>
    603669<td>480 (0x1e0)</td>
     670<td>✓</td>
     671<td>✓</td>
    604672<td>V_DIV_SCALE_F32 (VOP3B)</td>
    605673</tr>
    606674<tr>
    607675<td>481 (0x1e1)</td>
     676<td>✓</td>
     677<td>✓</td>
    608678<td>V_DIV_SCALE_F64 (VOP3B)</td>
    609679</tr>
    610680<tr>
    611681<td>482 (0x1e2)</td>
     682<td>✓</td>
     683<td>✓</td>
    612684<td>V_DIV_FMAS_F32</td>
    613685</tr>
    614686<tr>
    615687<td>483 (0x1e3)</td>
     688<td>✓</td>
     689<td>✓</td>
    616690<td>V_DIV_FMAS_F64</td>
    617691</tr>
    618692<tr>
    619693<td>484 (0x1e4)</td>
     694<td>✓</td>
     695<td>✓</td>
    620696<td>V_MSAD_U8</td>
    621697</tr>
    622698<tr>
    623699<td>485 (0x1e5)</td>
     700<td>✓</td>
     701<td>✓</td>
    624702<td>V_QSAD_PK_U16_U8</td>
    625703</tr>
    626704<tr>
    627705<td>486 (0x1e6)</td>
     706<td>✓</td>
     707<td>✓</td>
    628708<td>V_MQSAD_PK_U16_U8</td>
    629709</tr>
    630710<tr>
    631711<td>487 (0x1e7)</td>
     712<td>✓</td>
     713<td>✓</td>
    632714<td>V_MQSAD_U32_U8</td>
    633715</tr>
    634716<tr>
    635717<td>488 (0x1e8)</td>
     718<td>✓</td>
     719<td>✓</td>
    636720<td>V_MAD_U64_U32 (VOP3B)</td>
    637721</tr>
    638722<tr>
    639723<td>489 (0x1e9)</td>
     724<td>✓</td>
     725<td>✓</td>
    640726<td>V_MAD_I64_I32 (VOP3B)</td>
    641727</tr>
    642728<tr>
    643729<td>490 (0x1ea)</td>
     730<td>✓</td>
     731<td>✓</td>
    644732<td>V_MAD_F16</td>
    645733</tr>
    646734<tr>
    647735<td>491 (0x1eb)</td>
     736<td>✓</td>
     737<td>✓</td>
    648738<td>V_MAD_U16</td>
    649739</tr>
    650740<tr>
    651741<td>492 (0x1ec)</td>
     742<td>✓</td>
     743<td>✓</td>
    652744<td>V_MAD_I16</td>
    653745</tr>
    654746<tr>
    655747<td>493 (0x1ed)</td>
     748<td>✓</td>
     749<td>✓</td>
    656750<td>V_PERM_B32</td>
    657751</tr>
    658752<tr>
    659753<td>494 (0x1ee)</td>
     754<td>✓</td>
     755<td>✓</td>
    660756<td>V_FMA_F16</td>
    661757</tr>
    662758<tr>
    663759<td>495 (0x1ef)</td>
     760<td>✓</td>
     761<td>✓</td>
    664762<td>V_DIV_FIXUP_F16</td>
    665763</tr>
    666764<tr>
    667765<td>496 (0x1f0)</td>
     766<td>✓</td>
     767<td>✓</td>
    668768<td>V_CVT_PKACCUM_U8_F32</td>
    669769</tr>
    670770<tr>
    671771<td>624 (0x270)</td>
     772<td>✓</td>
     773<td>✓</td>
    672774<td>V_INTERP_P1_F32 (VINTRP)</td>
    673775</tr>
    674776<tr>
    675777<td>625 (0x271)</td>
     778<td>✓</td>
     779<td>✓</td>
    676780<td>V_INTERP_P2_F32 (VINTRP)</td>
    677781</tr>
    678782<tr>
    679783<td>626 (0x272)</td>
     784<td>✓</td>
     785<td>✓</td>
    680786<td>V_INTERP_MOV_F32 (VINTRP)</td>
    681787</tr>
    682788<tr>
    683789<td>627 (0x273)</td>
     790<td>✓</td>
     791<td>✓</td>
    684792<td>V_INTERP_P1LL_F16 (VINTRP)</td>
    685793</tr>
    686794<tr>
    687795<td>628 (0x274)</td>
     796<td>✓</td>
     797<td>✓</td>
    688798<td>V_INTERP_P1LV_F16 (VINTRP)</td>
    689799</tr>
    690800<tr>
    691801<td>629 (0x275)</td>
     802<td>✓</td>
     803<td>✓</td>
    692804<td>V_INTERP_P2_F16 (VINTRP)</td>
    693805</tr>
    694806<tr>
    695807<td>640 (0x280)</td>
     808<td>✓</td>
     809<td>✓</td>
    696810<td>V_ADD_F64</td>
    697811</tr>
    698812<tr>
    699813<td>641 (0x281)</td>
     814<td>✓</td>
     815<td>✓</td>
    700816<td>V_MUL_F64</td>
    701817</tr>
    702818<tr>
    703819<td>642 (0x282)</td>
     820<td>✓</td>
     821<td>✓</td>
    704822<td>V_MIN_F64</td>
    705823</tr>
    706824<tr>
    707825<td>643 (0x283)</td>
     826<td>✓</td>
     827<td>✓</td>
    708828<td>V_MAX_F64</td>
    709829</tr>
    710830<tr>
    711831<td>644 (0x284)</td>
     832<td>✓</td>
     833<td>✓</td>
    712834<td>V_LDEXP_F64</td>
    713835</tr>
    714836<tr>
    715837<td>645 (0x285)</td>
     838<td>✓</td>
     839<td>✓</td>
    716840<td>V_MUL_LO_U32</td>
    717841</tr>
    718842<tr>
    719843<td>646 (0x286)</td>
     844<td>✓</td>
     845<td>✓</td>
    720846<td>V_MUL_HI_U32</td>
    721847</tr>
    722848<tr>
    723849<td>647 (0x287)</td>
     850<td>✓</td>
     851<td>✓</td>
    724852<td>V_MUL_HI_I32</td>
    725853</tr>
    726854<tr>
    727855<td>648 (0x288)</td>
     856<td>✓</td>
     857<td>✓</td>
    728858<td>V_LDEXP_F32</td>
    729859</tr>
    730860<tr>
    731861<td>649 (0x289)</td>
     862<td>✓</td>
     863<td>✓</td>
    732864<td>V_READLANE_B32</td>
    733865</tr>
    734866<tr>
    735867<td>650 (0x28a)</td>
     868<td>✓</td>
     869<td>✓</td>
    736870<td>V_WRITELANE_B32</td>
    737871</tr>
    738872<tr>
    739873<td>651 (0x28b)</td>
     874<td>✓</td>
     875<td>✓</td>
    740876<td>V_BCNT_U32_B32</td>
    741877</tr>
    742878<tr>
    743879<td>652 (0x28c)</td>
     880<td>✓</td>
     881<td>✓</td>
    744882<td>V_MBCNT_LO_U32_B32</td>
    745883</tr>
    746884<tr>
    747885<td>653 (0x28d)</td>
     886<td>✓</td>
     887<td>✓</td>
    748888<td>V_MBCNT_HI_U32_B32</td>
    749889</tr>
    750890<tr>
    751891<td>654 (0x28e)</td>
     892<td>✓</td>
     893<td>✓</td>
    752894<td>V_MAC_LEGACY_F32</td>
    753895</tr>
    754896<tr>
    755897<td>655 (0x28f)</td>
     898<td>✓</td>
     899<td>✓</td>
    756900<td>V_LSHLREV_B64</td>
    757901</tr>
    758902<tr>
    759903<td>656 (0x290)</td>
     904<td>✓</td>
     905<td>✓</td>
    760906<td>V_LSHRREV_B64</td>
    761907</tr>
    762908<tr>
    763909<td>657 (0x291)</td>
     910<td>✓</td>
     911<td>✓</td>
    764912<td>V_ASHRREV_I64</td>
    765913</tr>
    766914<tr>
    767915<td>658 (0x292)</td>
     916<td>✓</td>
     917<td>✓</td>
    768918<td>V_TRIG_PREOP_F64</td>
    769919</tr>
    770920<tr>
    771921<td>659 (0x293)</td>
     922<td>✓</td>
     923<td>✓</td>
    772924<td>V_BFM_B32</td>
    773925</tr>
    774926<tr>
    775927<td>660 (0x294)</td>
     928<td>✓</td>
     929<td>✓</td>
    776930<td>V_CVT_PKNORM_I16_F32</td>
    777931</tr>
    778932<tr>
    779933<td>661 (0x295)</td>
     934<td>✓</td>
     935<td>✓</td>
    780936<td>V_CVT_PKNORM_U16_F32</td>
    781937</tr>
    782938<tr>
    783939<td>662 (0x296)</td>
     940<td>✓</td>
     941<td>✓</td>
    784942<td>V_CVT_PKRTZ_F16_F32</td>
    785943</tr>
    786944<tr>
    787945<td>663 (0x297)</td>
     946<td>✓</td>
     947<td>✓</td>
    788948<td>V_CVT_PK_U16_U32</td>
    789949</tr>
    790950<tr>
    791951<td>664 (0x298)</td>
     952<td>✓</td>
     953<td>✓</td>
    792954<td>V_CVT_PK_I16_I32</td>
     955</tr>
     956<tr>
     957<td>665 (0x299)</td>
     958<td></td>
     959<td>✓</td>
     960<td>V_CVT_PKNORM_I16_F16</td>
     961</tr>
     962<tr>
     963<td>666 (0x29a)</td>
     964<td></td>
     965<td>✓</td>
     966<td>V_CVT_PKNORM_U16_F16</td>
    793967</tr>
    794968</tbody>
     
    9871161    VAL8 = (UINT8)MAX(MIN(f, 255.0), 0.0)
    9881162VDST = (VDST&amp;~mask) | (((UINT32)VAL8) &lt;&lt; shift)</code></p>
     1163<h4>V_CVT_PKNORM_I16_F16</h4>
     1164<p>Opcode: 665 (0x299) for GCN 1.4<br />
     1165Syntax: V_CVT_PKNORM_I16_F16 VDST, SRC0, SRC1<br />
     1166Description: Convert normalized half FP value from SRC0 and SRC1 to
     1167signed 16-bit integers with rounding to nearest to even (??),
     1168and store first value to low 16-bit and second to high 16-bit of the VDST.<br />
     1169Operation:<br />
     1170<code>INT16 roundNorm(HALF S)
     1171{
     1172    FLOAT f = RNDNEINT(S*32767)
     1173    if (ISNAN(f))
     1174        return 0
     1175    return (INT16)MAX(MIN(f, 32767.0), -32767.0)
     1176}
     1177VDST = roundNorm(ASHALF(SRC0)) | ((UINT32)roundNorm(ASHALF(SRC1)) &lt;&lt; 16)</code></p>
    9891178<h4>V_CVT_PKNORM_I16_F32</h4>
    9901179<p>Opcode: 660 (0x294) for GCN 1.2<br />
     
    10021191}
    10031192VDST = roundNorm(ASFLOAT(SRC0)) | ((UINT32)roundNorm(ASFLOAT(SRC1)) &lt;&lt; 16)</code></p>
     1193<h4>V_CVT_PKNORM_U16_F16</h4>
     1194<p>Opcode: 666 (0x29a) for GCN 1.4<br />
     1195Syntax: V_CVT_PKNORM_U16_F16 VDST, SRC0, SRC1<br />
     1196Description: Convert normalized half FP value from SRC0 and SRC1 to
     1197unsigned 16-bit integers with rounding to nearest to even (??),
     1198and store first value to low 16-bit and second to high 16-bit of the VDST.<br />
     1199Operation:<br />
     1200<code>UINT16 roundNorm(HALF S)
     1201{
     1202    HALF f = RNDNEINT(S*65535.0)
     1203    if (ISNAN(f))
     1204        return 0
     1205    return (INT16)MAX(MIN(f, 65535.0), 0.0)
     1206}
     1207VDST = roundNorm(ASHALF(SRC0)) | ((UINT32)roundNorm(ASHALF(SRC1)) &lt;&lt; 16)</code></p>
    10041208<h4>V_CVT_PKNORM_U16_F32</h4>
    10051209<p>Opcode: 661 (0x295) for GCN 1.2<br />