Changes between Version 23 and Version 24 of GcnTimings


Ignore:
Timestamp:
06/20/17 21:00:28 (7 years ago)
Author:
trac
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • GcnTimings

    v23 v24  
    258258<td>V_BFREV_B32</td>
    259259<td>4</td>
     260<td>V_FREXP_EXP_I16_F16</td>
     261<td>4</td>
     262</tr>
     263<tr>
     264<td>V_CEIL_F16</td>
     265<td>4</td>
    260266<td>V_FREXP_EXP_I32_F32</td>
    261267<td>4</td>
     
    270276<td>V_CEIL_F64</td>
    271277<td>DPFACTOR*4</td>
     278<td>V_FREXP_MANT_F16</td>
     279<td>4</td>
     280</tr>
     281<tr>
     282<td>V_CLREXCP</td>
     283<td>4</td>
    272284<td>V_FREXP_MANT_F32</td>
    273285<td>4</td>
    274286</tr>
    275287<tr>
    276 <td>V_CLREXCP</td>
    277 <td>4</td>
     288<td>V_COS_F16</td>
     289<td>16</td>
    278290<td>V_FREXP_MANT_F64</td>
    279291<td>DPFACTOR*4</td>
     
    288300<td>V_CVT_F16_F32</td>
    289301<td>4</td>
     302<td>V_LOG_F16</td>
     303<td>16</td>
     304</tr>
     305<tr>
     306<td>V_CVT_F16_I16</td>
     307<td>4</td>
    290308<td>V_LOG_F32</td>
    291309<td>16</td>
    292310</tr>
    293311<tr>
     312<td>V_CVT_F16_U16</td>
     313<td>4</td>
     314<td>V_LOG_LEGACY_F32</td>
     315<td>16</td>
     316</tr>
     317<tr>
    294318<td>V_CVT_F32_F16</td>
    295319<td>4</td>
    296 <td>V_LOG_LEGACY_F32</td>
    297 <td>16</td>
     320<td>V_MOVRELD_B32</td>
     321<td>4</td>
    298322</tr>
    299323<tr>
    300324<td>V_CVT_F32_F64</td>
    301325<td>DPFACTOR*4</td>
    302 <td>V_MOVRELD_B32</td>
     326<td>V_MOVRELSD_B32</td>
    303327<td>4</td>
    304328</tr>
     
    306330<td>V_CVT_F32_I32</td>
    307331<td>4</td>
    308 <td>V_MOVRELSD_B32</td>
     332<td>V_MOVRELS_B32</td>
    309333<td>4</td>
    310334</tr>
     
    312336<td>V_CVT_F32_U32</td>
    313337<td>4</td>
    314 <td>V_MOVRELS_B32</td>
     338<td>V_MOV_B32</td>
    315339<td>4</td>
    316340</tr>
     
    318342<td>V_CVT_F32_UBYTE0</td>
    319343<td>4</td>
    320 <td>V_MOV_B32</td>
     344<td>V_MOV_FED_B32</td>
    321345<td>4</td>
    322346</tr>
     
    324348<td>V_CVT_F32_UBYTE1</td>
    325349<td>4</td>
    326 <td>V_MOV_FED_B32</td>
     350<td>V_NOP</td>
    327351<td>4</td>
    328352</tr>
     
    330354<td>V_CVT_F32_UBYTE2</td>
    331355<td>4</td>
    332 <td>V_NOP</td>
     356<td>V_NOT_B32</td>
    333357<td>4</td>
    334358</tr>
     
    336360<td>V_CVT_F32_UBYTE3</td>
    337361<td>4</td>
    338 <td>V_NOT_B32</td>
    339 <td>4</td>
     362<td>V_RCP_CLAMP_F32</td>
     363<td>16</td>
    340364</tr>
    341365<tr>
    342366<td>V_CVT_F64_F32</td>
    343 <td>DPFACTOR*4</td>
    344 <td>V_RCP_CLAMP_F32</td>
    345 <td>16</td>
    346 </tr>
    347 <tr>
    348 <td>V_CVT_F64_I32</td>
    349367<td>DPFACTOR*4</td>
    350368<td>V_RCP_CLAMP_F64</td>
     
    352370</tr>
    353371<tr>
     372<td>V_CVT_F64_I32</td>
     373<td>DPFACTOR*4</td>
     374<td>V_RCP_F16</td>
     375<td>16</td>
     376</tr>
     377<tr>
    354378<td>V_CVT_F64_U32</td>
    355379<td>DPFACTOR*4</td>
     
    364388</tr>
    365389<tr>
     390<td>V_CVT_I16_F16</td>
     391<td>4</td>
     392<td>V_RCP_IFLAG_F32</td>
     393<td>16</td>
     394</tr>
     395<tr>
    366396<td>V_CVT_I32_F32</td>
    367397<td>4</td>
    368 <td>V_RCP_IFLAG_F32</td>
     398<td>V_RCP_LEGACY_F32</td>
    369399<td>16</td>
    370400</tr>
     
    372402<td>V_CVT_I32_F64</td>
    373403<td>DPFACTOR*4</td>
    374 <td>V_RCP_LEGACY_F32</td>
    375 <td>16</td>
     404<td>V_READFIRSTLANE_B32</td>
     405<td>4</td>
    376406</tr>
    377407<tr>
    378408<td>V_CVT_OFF_F32_I4</td>
    379409<td>4</td>
    380 <td>V_READFIRSTLANE_B32</td>
     410<td>V_RNDNE_F16</td>
    381411<td>4</td>
    382412</tr>
     
    388418</tr>
    389419<tr>
     420<td>V_CVT_U16_F16</td>
     421<td>4</td>
     422<td>V_RNDNE_F64</td>
     423<td>DPFACTOR*4</td>
     424</tr>
     425<tr>
    390426<td>V_CVT_U32_F32</td>
    391427<td>4</td>
    392 <td>V_RNDNE_F64</td>
    393 <td>DPFACTOR*4</td>
     428<td>V_RSQ_CLAMP_F32</td>
     429<td>16</td>
    394430</tr>
    395431<tr>
    396432<td>V_CVT_U32_F64</td>
    397433<td>DPFACTOR*4</td>
    398 <td>V_RSQ_CLAMP_F32</td>
    399 <td>16</td>
    400 </tr>
    401 <tr>
    402 <td>V_EXP_F32</td>
    403 <td>16</td>
    404434<td>V_RSQ_CLAMP_F64</td>
    405435<td>DPFACTOR*8</td>
    406436</tr>
    407437<tr>
     438<td>V_EXP_F16</td>
     439<td>16</td>
     440<td>V_RSQ_F16</td>
     441<td>16</td>
     442</tr>
     443<tr>
     444<td>V_EXP_F32</td>
     445<td>16</td>
     446<td>V_RSQ_F32</td>
     447<td>16</td>
     448</tr>
     449<tr>
    408450<td>V_EXP_LEGACY_F32</td>
    409451<td>16</td>
    410 <td>V_RSQ_F32</td>
    411 <td>16</td>
    412 </tr>
    413 <tr>
    414 <td>V_FFBH_I32</td>
    415 <td>4</td>
    416452<td>V_RSQ_F64</td>
    417453<td>DPFACTOR*8</td>
    418454</tr>
    419455<tr>
     456<td>V_FFBH_I32</td>
     457<td>4</td>
     458<td>V_RSQ_LEGACY_F32</td>
     459<td>16</td>
     460</tr>
     461<tr>
    420462<td>V_FFBH_U32</td>
    421463<td>4</td>
    422 <td>V_RSQ_LEGACY_F32</td>
     464<td>V_SIN_F16</td>
    423465<td>16</td>
    424466</tr>
     
    427469<td>4</td>
    428470<td>V_SIN_F32</td>
     471<td>16</td>
     472</tr>
     473<tr>
     474<td>V_FLOOR_F16</td>
     475<td>4</td>
     476<td>V_SQRT_F16</td>
    429477<td>16</td>
    430478</tr>
     
    440488<td>V_SQRT_F64</td>
    441489<td>DPFACTOR*8</td>
     490</tr>
     491<tr>
     492<td>V_FRACT_F16</td>
     493<td>4</td>
     494<td>V_TRUNC_F16</td>
     495<td>4</td>
    442496</tr>
    443497<tr>
     
    457511<h3>VOPC Instruction timings</h3>
    458512<p>Maximum throughput of these instructions can be calculated by using expression
    459 <code>(1/(CYCLES/4))</code> - for 4 cycles it is 1 instruction per cycle, for 8 cycles it is 1/2 instruction
    460 per cycle, etc.
    461 All 32-bit comparison instructions take 4 cycles. All 64-bit comparison instructions take
    462 DPFACTOR*4 cycles.</p>
     513<code>(1/(CYCLES/4))</code> - for 4 cycles it is 1 instruction per cycle, for 8 cycles it is 1/2
     514instruction per cycle, etc.
     515All 16-bit and 32-bit comparison instructions take 4 cycles.
     516All 64-bit comparison instructions take DPFACTOR*4 cycles.</p>
    463517<h3>VOP3 Instruction timings</h3>
    464518<p>Maximum throughput of these instructions can be calculated by using expression
    465 <code>(1/(CYCLES/4))</code> - for 4 cycles it is 1 instruction per cycle, for 8 cycles it is 1/2 instruction
    466 per cycle and etc.
    467 Timings of VOP3 instructions are in this table:</p>
     519<code>(1/(CYCLES/4))</code> - for 4 cycles it is 1 instruction per cycle, for 8 cycles it is 1/2
     520instruction per cycle and etc.</p>
     521<p>Timings of VOP3 instructions are in this table:</p>
    468522<table>
    469523<thead>
     
    479533<td>V_ADD_F64</td>
    480534<td>DPFACTOR*4</td>
     535<td>V_MAD_LEGACY_F32</td>
     536<td>4</td>
     537</tr>
     538<tr>
     539<td>V_ALIGNBIT_B32</td>
     540<td>4</td>
     541<td>V_MAD_U16</td>
     542<td>4</td>
     543</tr>
     544<tr>
     545<td>V_ALIGNBYTE_B32</td>
     546<td>4</td>
     547<td>V_MAD_U32_U24</td>
     548<td>4</td>
     549</tr>
     550<tr>
     551<td>V_ASHR_I64</td>
     552<td>DPFACTOR*4</td>
    481553<td>V_MAD_U64_U32</td>
    482554<td>16</td>
    483555</tr>
    484556<tr>
    485 <td>V_ALIGNBIT_B32</td>
    486 <td>4</td>
     557<td>V_ASHRREV_I64</td>
     558<td>DPFACTOR*4</td>
    487559<td>V_MAX3_F32</td>
    488560<td>4</td>
    489561</tr>
    490562<tr>
    491 <td>V_ALIGNBYTE_B32</td>
     563<td>V_BFE_I32</td>
    492564<td>4</td>
    493565<td>V_MAX3_I32</td>
     
    495567</tr>
    496568<tr>
    497 <td>V_ASHR_I64</td>
    498 <td>DPFACTOR*4</td>
     569<td>V_BFE_U32</td>
     570<td>4</td>
    499571<td>V_MAX3_U32</td>
    500572<td>4</td>
    501573</tr>
    502574<tr>
    503 <td>V_BFE_I32</td>
     575<td>V_BFI_B32</td>
    504576<td>4</td>
    505577<td>V_MAX_F64</td>
     
    507579</tr>
    508580<tr>
    509 <td>V_BFE_U32</td>
     581<td>V_CUBEID_F32</td>
    510582<td>4</td>
    511583<td>V_MED3_F32</td>
     
    513585</tr>
    514586<tr>
    515 <td>V_BFI_B32</td>
     587<td>V_CUBEMA_F32</td>
    516588<td>4</td>
    517589<td>V_MED3_I32</td>
     
    519591</tr>
    520592<tr>
    521 <td>V_CUBEID_F32</td>
     593<td>V_CUBESC_F32</td>
    522594<td>4</td>
    523595<td>V_MED3_U32</td>
     
    525597</tr>
    526598<tr>
    527 <td>V_CUBEMA_F32</td>
     599<td>V_CUBETC_F32</td>
    528600<td>4</td>
    529601<td>V_MIN3_F32</td>
     
    531603</tr>
    532604<tr>
    533 <td>V_CUBESC_F32</td>
     605<td>V_CVT_PK_U8_F32</td>
    534606<td>4</td>
    535607<td>V_MIN3_I32</td>
     
    537609</tr>
    538610<tr>
    539 <td>V_CUBETC_F32</td>
    540 <td>4</td>
     611<td>V_DIV_FIXUP_F32</td>
     612<td>16</td>
    541613<td>V_MIN3_U32</td>
    542614<td>4</td>
    543615</tr>
    544616<tr>
    545 <td>V_CVT_PK_U8_F32</td>
    546 <td>4</td>
     617<td>V_DIV_FIXUP_F64</td>
     618<td>DPFACTOR*4</td>
    547619<td>V_MIN_F64</td>
    548620<td>DPFACTOR*4</td>
    549621</tr>
    550622<tr>
    551 <td>V_DIV_FIXUP_F32</td>
     623<td>V_DIV_FMAS_F32</td>
    552624<td>16</td>
    553625<td>V_MQSAD_PK_U16_U8</td>
    554 <td>16</td>
    555 </tr>
    556 <tr>
    557 <td>V_DIV_FIXUP_F64</td>
    558 <td>DPFACTOR*4</td>
    559 <td>V_MQSAD_U32_U8</td>
    560 <td>16</td>
    561 </tr>
    562 <tr>
    563 <td>V_DIV_FMAS_F32</td>
    564 <td>16</td>
    565 <td>V_MQSAD_U8</td>
    566626<td>16</td>
    567627</tr>
     
    569629<td>V_DIV_FMAS_F64</td>
    570630<td>DPFACTOR*8</td>
     631<td>V_MQSAD_U32_U8</td>
     632<td>16</td>
     633</tr>
     634<tr>
     635<td>V_DIV_SCALE_F32</td>
     636<td>16</td>
     637<td>V_MQSAD_U8</td>
     638<td>16</td>
     639</tr>
     640<tr>
     641<td>V_DIV_SCALE_F64</td>
     642<td>DPFACTOR*4</td>
    571643<td>V_MSAD_U8</td>
    572644<td>4</td>
    573645</tr>
    574646<tr>
    575 <td>V_DIV_SCALE_F32</td>
    576 <td>16</td>
     647<td>V_MAD_F16</td>
     648<td>4</td>
    577649<td>V_MULLIT_F32</td>
    578650<td>4</td>
    579651</tr>
    580652<tr>
    581 <td>V_DIV_SCALE_F64</td>
    582 <td>DPFACTOR*4</td>
     653<td>V_FMA_F32</td>
     654<td>4 or 16 (1)</td>
    583655<td>V_MUL_F64</td>
    584656<td>DPFACTOR*8</td>
    585657</tr>
    586658<tr>
    587 <td>V_FMA_F32</td>
    588 <td>4 or 16 (1)</td>
    589 <td>V_MUL_HI_I32</td>
    590 <td>16</td>
    591 </tr>
    592 <tr>
    593659<td>V_FMA_F64</td>
    594660<td>DPFACTOR*8</td>
     661<td>V_MUL_HI_I32</td>
     662<td>16</td>
     663</tr>
     664<tr>
     665<td>V_LDEXP_F64</td>
     666<td>DPFACTOR*4</td>
    595667<td>V_MUL_HI_U32</td>
    596668<td>16</td>
    597669</tr>
    598670<tr>
    599 <td>V_LDEXP_F64</td>
    600 <td>DPFACTOR*4</td>
     671<td>V_LERP_U8</td>
     672<td>4</td>
    601673<td>V_MUL_LO_I32</td>
    602674<td>16</td>
    603675</tr>
    604676<tr>
    605 <td>V_LERP_U8</td>
    606 <td>4</td>
     677<td>V_LSHL_B64</td>
     678<td>DPFACTOR*4</td>
    607679<td>V_MUL_LO_U32</td>
    608680<td>16</td>
    609681</tr>
    610682<tr>
    611 <td>V_LSHL_B64</td>
     683<td>V_LSHLREV_B64</td>
    612684<td>DPFACTOR*4</td>
    613685<td>V_QSAD_PK_U16_U8</td>
     
    621693</tr>
    622694<tr>
     695<td>V_LSHRREV_B64</td>
     696<td>DPFACTOR*4</td>
     697<td>V_SAD_HI_U8</td>
     698<td>4</td>
     699</tr>
     700<tr>
     701<td>V_MAD_F16</td>
     702<td>4</td>
     703<td>V_SAD_U16</td>
     704<td>4</td>
     705</tr>
     706<tr>
    623707<td>V_MAD_F32</td>
    624708<td>4</td>
    625 <td>V_SAD_HI_U8</td>
     709<td>V_SAD_U32</td>
     710<td>4</td>
     711</tr>
     712<tr>
     713<td>V_MAD_I16</td>
     714<td>4</td>
     715<td>V_SAD_U8</td>
    626716<td>4</td>
    627717</tr>
    628718<tr>
    629719<td>V_MAD_I32_I24</td>
    630 <td>4</td>
    631 <td>V_SAD_U16</td>
    632 <td>4</td>
    633 </tr>
    634 <tr>
    635 <td>V_MAD_I64_I32</td>
    636 <td>16</td>
    637 <td>V_SAD_U32</td>
    638 <td>4</td>
    639 </tr>
    640 <tr>
    641 <td>V_MAD_LEGACY_F32</td>
    642 <td>4</td>
    643 <td>V_SAD_U8</td>
    644 <td>4</td>
    645 </tr>
    646 <tr>
    647 <td>V_MAD_U32_U24</td>
    648720<td>4</td>
    649721<td>V_TRIG_PREOP_F64</td>
    650722<td>DPFACTOR*8</td>
     723</tr>
     724<tr>
     725<td>V_MAD_I64_I32</td>
     726<td>16</td>
     727<td></td>
     728<td></td>
    651729</tr>
    652730</tbody>