Changes between Version 25 and Version 26 of GcnTimings


Ignore:
Timestamp:
12/06/17 20:00:32 (6 years ago)
Author:
trac
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • GcnTimings

    v25 v26  
    258258<td>V_BFREV_B32</td>
    259259<td>4</td>
    260 <td>V_FREXP_EXP_I32_F32</td>
    261 <td>4</td>
     260<td>V_FREXP_EXP_I32_F64</td>
     261<td>DPFACTOR*4</td>
    262262</tr>
    263263<tr>
    264264<td>V_CEIL_F16</td>
    265265<td>4</td>
    266 <td>V_FREXP_EXP_I32_F64</td>
    267 <td>DPFACTOR*4</td>
     266<td>V_FREXP_MANT_F16</td>
     267<td>4</td>
    268268</tr>
    269269<tr>
    270270<td>V_CEIL_F32</td>
    271271<td>4</td>
    272 <td>V_FREXP_MANT_F16</td>
     272<td>V_FREXP_MANT_F32</td>
    273273<td>4</td>
    274274</tr>
     
    276276<td>V_CEIL_F64</td>
    277277<td>DPFACTOR*4</td>
    278 <td>V_FREXP_MANT_F32</td>
    279 <td>4</td>
     278<td>V_FREXP_MANT_F64</td>
     279<td>DPFACTOR*4</td>
    280280</tr>
    281281<tr>
    282282<td>V_CLREXCP</td>
    283283<td>4</td>
    284 <td>V_FREXP_MANT_F64</td>
    285 <td>DPFACTOR*4</td>
     284<td>V_LOG_CLAMP_F32</td>
     285<td>16</td>
    286286</tr>
    287287<tr>
    288288<td>V_COS_F16</td>
    289289<td>16</td>
    290 <td>V_LOG_CLAMP_F32</td>
     290<td>V_LOG_F16</td>
    291291<td>16</td>
    292292</tr>
     
    294294<td>V_COS_F32</td>
    295295<td>16</td>
    296 <td>V_LOG_F16</td>
     296<td>V_LOG_F32</td>
    297297<td>16</td>
    298298</tr>
     
    300300<td>V_CVT_F16_F32</td>
    301301<td>4</td>
    302 <td>V_LOG_F32</td>
     302<td>V_LOG_LEGACY_F32</td>
    303303<td>16</td>
    304304</tr>
     
    306306<td>V_CVT_F16_I16</td>
    307307<td>4</td>
    308 <td>V_LOG_LEGACY_F32</td>
    309 <td>16</td>
     308<td>V_MBCNT_LO_U32_B32</td>
     309<td>4</td>
    310310</tr>
    311311<tr>
    312312<td>V_CVT_F16_U16</td>
    313313<td>4</td>
    314 <td>V_MBCNT_LO_U32_B32</td>
     314<td>V_MBCNT_HI_U32_B32</td>
    315315<td>4</td>
    316316</tr>
     
    318318<td>V_CVT_F32_F16</td>
    319319<td>4</td>
    320 <td>V_MBCNT_HI_U32_B32</td>
     320<td>V_MOVRELD_B32</td>
    321321<td>4</td>
    322322</tr>
     
    324324<td>V_CVT_F32_F64</td>
    325325<td>DPFACTOR*4</td>
    326 <td>V_MOVRELD_B32</td>
     326<td>V_MOVRELSD_B32</td>
    327327<td>4</td>
    328328</tr>
     
    330330<td>V_CVT_F32_I32</td>
    331331<td>4</td>
    332 <td>V_MOVRELSD_B32</td>
     332<td>V_MOVRELS_B32</td>
    333333<td>4</td>
    334334</tr>
     
    336336<td>V_CVT_F32_U32</td>
    337337<td>4</td>
    338 <td>V_MOVRELS_B32</td>
     338<td>V_MOV_B32</td>
    339339<td>4</td>
    340340</tr>
     
    342342<td>V_CVT_F32_UBYTE0</td>
    343343<td>4</td>
    344 <td>V_MOV_B32</td>
     344<td>V_MOV_FED_B32</td>
    345345<td>4</td>
    346346</tr>
     
    348348<td>V_CVT_F32_UBYTE1</td>
    349349<td>4</td>
    350 <td>V_MOV_FED_B32</td>
     350<td>V_MOV_PRSV_B32</td>
    351351<td>4</td>
    352352</tr>
     
    406406</tr>
    407407<tr>
     408<td>V_CVT_NORM_I16_F16</td>
     409<td>4</td>
     410<td>V_READFIRSTLANE_B32</td>
     411<td>4</td>
     412</tr>
     413<tr>
     414<td>V_CVT_NORM_U16_F16</td>
     415<td>4</td>
     416<td>V_RNDNE_F16</td>
     417<td>4</td>
     418</tr>
     419<tr>
    408420<td>V_CVT_OFF_F32_I4</td>
    409421<td>4</td>
    410 <td>V_READFIRSTLANE_B32</td>
     422<td>V_RNDNE_F32</td>
    411423<td>4</td>
    412424</tr>
     
    414426<td>V_CVT_RPI_I32_F32</td>
    415427<td>4</td>
    416 <td>V_RNDNE_F16</td>
    417 <td>4</td>
     428<td>V_RNDNE_F64</td>
     429<td>DPFACTOR*4</td>
    418430</tr>
    419431<tr>
    420432<td>V_CVT_U16_F16</td>
    421433<td>4</td>
    422 <td>V_RNDNE_F32</td>
    423 <td>4</td>
     434<td>V_RSQ_CLAMP_F32</td>
     435<td>16</td>
    424436</tr>
    425437<tr>
    426438<td>V_CVT_U32_F32</td>
    427439<td>4</td>
    428 <td>V_RNDNE_F64</td>
    429 <td>DPFACTOR*4</td>
    430 </tr>
    431 <tr>
    432 <td>V_CVT_U32_F64</td>
    433 <td>DPFACTOR*4</td>
    434 <td>V_RSQ_CLAMP_F32</td>
    435 <td>16</td>
    436 </tr>
    437 <tr>
    438 <td>V_EXP_F16</td>
    439 <td>16</td>
    440440<td>V_RSQ_CLAMP_F64</td>
    441441<td>DPFACTOR*8</td>
    442442</tr>
    443443<tr>
     444<td>V_CVT_U32_F64</td>
     445<td>DPFACTOR*4</td>
     446<td>V_RSQ_F16</td>
     447<td>16</td>
     448</tr>
     449<tr>
     450<td>V_EXP_F16</td>
     451<td>16</td>
     452<td>V_RSQ_F32</td>
     453<td>16</td>
     454</tr>
     455<tr>
    444456<td>V_EXP_F32</td>
    445457<td>16</td>
    446 <td>V_RSQ_F16</td>
    447 <td>16</td>
    448 </tr>
    449 <tr>
    450 <td>V_EXP_LEGACY_F32</td>
    451 <td>16</td>
    452 <td>V_RSQ_F32</td>
    453 <td>16</td>
    454 </tr>
    455 <tr>
    456 <td>V_FFBH_I32</td>
    457 <td>4</td>
    458458<td>V_RSQ_F64</td>
    459459<td>DPFACTOR*8</td>
    460460</tr>
    461461<tr>
     462<td>V_EXP_LEGACY_F32</td>
     463<td>16</td>
     464<td>V_RSQ_LEGACY_F32</td>
     465<td>16</td>
     466</tr>
     467<tr>
     468<td>V_FFBH_I32</td>
     469<td>4</td>
     470<td>V_SAT_PK_U8_I16</td>
     471<td>4</td>
     472</tr>
     473<tr>
    462474<td>V_FFBH_U32</td>
    463475<td>4</td>
    464 <td>V_RSQ_LEGACY_F32</td>
    465 <td>16</td>
     476<td>V_SCREEN_PARTITION_4SE_B32</td>
     477<td>4</td>
    466478</tr>
    467479<tr>
     
    498510<td>V_FRACT_F32</td>
    499511<td>4</td>
     512<td>V_SWAP_B32</td>
     513<td>8</td>
     514</tr>
     515<tr>
     516<td>V_FRACT_F64</td>
     517<td>DPFACTOR*4</td>
    500518<td>V_TRUNC_F16</td>
    501519<td>4</td>
    502520</tr>
    503521<tr>
    504 <td>V_FRACT_F64</td>
    505 <td>DPFACTOR*4</td>
     522<td>V_FREXP_EXP_I16_F16</td>
     523<td>4</td>
    506524<td>V_TRUNC_F32</td>
    507525<td>4</td>
    508526</tr>
    509527<tr>
    510 <td>V_FREXP_EXP_I16_F16</td>
     528<td>V_FREXP_EXP_I32_F32</td>
    511529<td>4</td>
    512530<td>V_TRUNC_F64</td>
     
    537555<tbody>
    538556<tr>
     557<td>V_ADD3_U32</td>
     558<td>4</td>
     559<td>V_MAD_LEGACY_U16</td>
     560<td>4</td>
     561</tr>
     562<tr>
    539563<td>V_ADD_F64</td>
    540564<td>DPFACTOR*4</td>
    541 <td>V_MAD_LEGACY_F32</td>
     565<td>V_MAD_U16</td>
     566<td>4</td>
     567</tr>
     568<tr>
     569<td>V_ADD_LSHL_U32</td>
     570<td>4</td>
     571<td>V_MAD_U32_U16</td>
    542572<td>4</td>
    543573</tr>
     
    545575<td>V_ALIGNBIT_B32</td>
    546576<td>4</td>
    547 <td>V_MAD_U16</td>
     577<td>V_MAD_U32_U24</td>
    548578<td>4</td>
    549579</tr>
     
    551581<td>V_ALIGNBYTE_B32</td>
    552582<td>4</td>
    553 <td>V_MAD_U32_U24</td>
     583<td>V_MAD_U64_U32</td>
     584<td>16</td>
     585</tr>
     586<tr>
     587<td>V_AND_OR_B32</td>
     588<td>4</td>
     589<td>V_MAX3_F16</td>
    554590<td>4</td>
    555591</tr>
     
    557593<td>V_ASHR_I64</td>
    558594<td>DPFACTOR*4</td>
    559 <td>V_MAD_U64_U32</td>
    560 <td>16</td>
     595<td>V_MAX3_F32</td>
     596<td>4</td>
    561597</tr>
    562598<tr>
    563599<td>V_ASHRREV_I64</td>
    564600<td>DPFACTOR*4</td>
    565 <td>V_MAX3_F32</td>
     601<td>V_MAX3_I16</td>
    566602<td>4</td>
    567603</tr>
     
    575611<td>V_BFE_U32</td>
    576612<td>4</td>
     613<td>V_MAX3_U16</td>
     614<td>4</td>
     615</tr>
     616<tr>
     617<td>V_BFI_B32</td>
     618<td>4</td>
    577619<td>V_MAX3_U32</td>
    578620<td>4</td>
    579621</tr>
    580622<tr>
    581 <td>V_BFI_B32</td>
     623<td>V_CUBEID_F32</td>
    582624<td>4</td>
    583625<td>V_MAX_F64</td>
     
    585627</tr>
    586628<tr>
    587 <td>V_CUBEID_F32</td>
     629<td>V_CUBEMA_F32</td>
     630<td>4</td>
     631<td>V_MED3_F16</td>
     632<td>4</td>
     633</tr>
     634<tr>
     635<td>V_CUBESC_F32</td>
    588636<td>4</td>
    589637<td>V_MED3_F32</td>
     
    591639</tr>
    592640<tr>
    593 <td>V_CUBEMA_F32</td>
     641<td>V_CUBETC_F32</td>
     642<td>4</td>
     643<td>V_MED3_I16</td>
     644<td>4</td>
     645</tr>
     646<tr>
     647<td>V_CVT_PK_U8_F32</td>
    594648<td>4</td>
    595649<td>V_MED3_I32</td>
     
    597651</tr>
    598652<tr>
    599 <td>V_CUBESC_F32</td>
    600 <td>4</td>
     653<td>V_DIV_FIXUP_F16</td>
     654<td>4</td>
     655<td>V_MED3_U16</td>
     656<td>4</td>
     657</tr>
     658<tr>
     659<td>V_DIV_FIXUP_F32</td>
     660<td>16</td>
    601661<td>V_MED3_U32</td>
    602662<td>4</td>
    603663</tr>
    604664<tr>
    605 <td>V_CUBETC_F32</td>
    606 <td>4</td>
     665<td>V_DIV_FIXUP_F64</td>
     666<td>DPFACTOR*4</td>
     667<td>V_MIN3_F16</td>
     668<td>4</td>
     669</tr>
     670<tr>
     671<td>V_DIV_FMAS_F32</td>
     672<td>16</td>
    607673<td>V_MIN3_F32</td>
    608674<td>4</td>
    609 </tr>
    610 <tr>
    611 <td>V_CVT_PK_U8_F32</td>
    612 <td>4</td>
    613 <td>V_MIN3_I32</td>
    614 <td>4</td>
    615 </tr>
    616 <tr>
    617 <td>V_DIV_FIXUP_F32</td>
    618 <td>16</td>
    619 <td>V_MIN3_U32</td>
    620 <td>4</td>
    621 </tr>
    622 <tr>
    623 <td>V_DIV_FIXUP_F64</td>
    624 <td>DPFACTOR*4</td>
    625 <td>V_MIN_F64</td>
    626 <td>DPFACTOR*4</td>
    627 </tr>
    628 <tr>
    629 <td>V_DIV_FMAS_F32</td>
    630 <td>16</td>
    631 <td>V_MQSAD_PK_U16_U8</td>
    632 <td>16</td>
    633675</tr>
    634676<tr>
    635677<td>V_DIV_FMAS_F64</td>
    636678<td>DPFACTOR*8</td>
    637 <td>V_MQSAD_U32_U8</td>
    638 <td>16</td>
     679<td>V_MIN3_I16</td>
     680<td>4</td>
    639681</tr>
    640682<tr>
    641683<td>V_DIV_SCALE_F32</td>
    642684<td>16</td>
    643 <td>V_MQSAD_U8</td>
    644 <td>16</td>
     685<td>V_MIN3_I32</td>
     686<td>4</td>
    645687</tr>
    646688<tr>
    647689<td>V_DIV_SCALE_F64</td>
    648690<td>DPFACTOR*4</td>
    649 <td>V_MSAD_U8</td>
     691<td>V_MIN3_U16</td>
    650692<td>4</td>
    651693</tr>
     
    653695<td>V_MAD_F16</td>
    654696<td>4</td>
    655 <td>V_MULLIT_F32</td>
     697<td>V_MIN3_U32</td>
    656698<td>4</td>
    657699</tr>
     
    659701<td>V_FMA_F32</td>
    660702<td>4 or 16 (1)</td>
     703<td>V_MIN_F64</td>
     704<td>DPFACTOR*4</td>
     705</tr>
     706<tr>
     707<td>V_FMA_F64</td>
     708<td>DPFACTOR*8</td>
     709<td>V_MQSAD_PK_U16_U8</td>
     710<td>16</td>
     711</tr>
     712<tr>
     713<td>V_FMA_LEGACY_F16</td>
     714<td>4</td>
     715<td>V_MQSAD_U32_U8</td>
     716<td>16</td>
     717</tr>
     718<tr>
     719<td>V_LDEXP_F64</td>
     720<td>DPFACTOR*4</td>
     721<td>V_MQSAD_U8</td>
     722<td>16</td>
     723</tr>
     724<tr>
     725<td>V_LERP_U8</td>
     726<td>4</td>
     727<td>V_MSAD_U8</td>
     728<td>4</td>
     729</tr>
     730<tr>
     731<td>V_LSHL_ADD_U32</td>
     732<td>4</td>
     733<td>V_MULLIT_F32</td>
     734<td>4</td>
     735</tr>
     736<tr>
     737<td>V_LSHL_B64</td>
     738<td>DPFACTOR*4</td>
    661739<td>V_MUL_F64</td>
    662740<td>DPFACTOR*8</td>
    663741</tr>
    664742<tr>
    665 <td>V_FMA_F64</td>
    666 <td>DPFACTOR*8</td>
     743<td>V_LSHL_OR_B32</td>
     744<td>4</td>
    667745<td>V_MUL_HI_I32</td>
    668746<td>16</td>
    669747</tr>
    670748<tr>
    671 <td>V_LDEXP_F64</td>
     749<td>V_LSHLREV_B64</td>
    672750<td>DPFACTOR*4</td>
    673751<td>V_MUL_HI_U32</td>
     
    675753</tr>
    676754<tr>
    677 <td>V_LERP_U8</td>
    678 <td>4</td>
     755<td>V_LSHR_B64</td>
     756<td>DPFACTOR*4</td>
    679757<td>V_MUL_LO_I32</td>
    680758<td>16</td>
    681759</tr>
    682760<tr>
    683 <td>V_LSHL_B64</td>
     761<td>V_LSHRREV_B64</td>
    684762<td>DPFACTOR*4</td>
    685763<td>V_MUL_LO_U32</td>
     
    687765</tr>
    688766<tr>
    689 <td>V_LSHLREV_B64</td>
    690 <td>DPFACTOR*4</td>
     767<td>V_MAD_F16</td>
     768<td>4</td>
     769<td>V_OR3_B32</td>
     770<td>4</td>
     771</tr>
     772<tr>
     773<td>V_MAD_F32</td>
     774<td>4</td>
    691775<td>V_QSAD_PK_U16_U8</td>
    692776<td>16</td>
    693777</tr>
    694778<tr>
    695 <td>V_LSHR_B64</td>
    696 <td>DPFACTOR*4</td>
     779<td>V_MAD_I16</td>
     780<td>4</td>
    697781<td>V_QSAD_U8</td>
    698782<td>16</td>
    699783</tr>
    700784<tr>
    701 <td>V_LSHRREV_B64</td>
    702 <td>DPFACTOR*4</td>
     785<td>V_MAD_I32_I16</td>
     786<td>4</td>
    703787<td>V_SAD_HI_U8</td>
    704788<td>4</td>
    705789</tr>
    706790<tr>
    707 <td>V_MAD_F16</td>
     791<td>V_MAD_I32_I24</td>
    708792<td>4</td>
    709793<td>V_SAD_U16</td>
     
    711795</tr>
    712796<tr>
    713 <td>V_MAD_F32</td>
    714 <td>4</td>
     797<td>V_MAD_I64_I32</td>
     798<td>16</td>
    715799<td>V_SAD_U32</td>
    716800<td>4</td>
    717801</tr>
    718802<tr>
    719 <td>V_MAD_I16</td>
     803<td>V_MAD_LEGACY_F16</td>
    720804<td>4</td>
    721805<td>V_SAD_U8</td>
     
    723807</tr>
    724808<tr>
    725 <td>V_MAD_I32_I24</td>
     809<td>V_MAD_LEGACY_F32</td>
    726810<td>4</td>
    727811<td>V_TRIG_PREOP_F64</td>
     
    729813</tr>
    730814<tr>
    731 <td>V_MAD_I64_I32</td>
    732 <td>16</td>
    733 <td></td>
    734 <td></td>
     815<td>V_MAD_LEGACY_I16</td>
     816<td>4</td>
     817<td>V_XAD_U32</td>
     818<td>4</td>
    735819</tr>
    736820</tbody>
    737821</table>
    738822<p>(1) - for device with DP speed 1/2, 1/4 or 1/8 is 4 cycles, for other devices is 16 cycles</p>
     823<h3>VOP3P Instruction timings</h3>
     824<p>All VOP3P instructions take 4 cycles. All instruction can achieve throughput 1 instruction
     825per cycle.</p>
    739826<h3>DS Instruction timings</h3>
    740827<p>Timings of DS instructions includes only execution without waiting for completing