Changes between Version 25 and Version 26 of GcnTimings
- Timestamp:
- 12/06/17 20:00:32 (6 years ago)
Legend:
- Unmodified
- Added
- Removed
- Modified
-
GcnTimings
v25 v26 258 258 <td>V_BFREV_B32</td> 259 259 <td>4</td> 260 <td>V_FREXP_EXP_I32_F 32</td>261 <td> 4</td>260 <td>V_FREXP_EXP_I32_F64</td> 261 <td>DPFACTOR*4</td> 262 262 </tr> 263 263 <tr> 264 264 <td>V_CEIL_F16</td> 265 265 <td>4</td> 266 <td>V_FREXP_ EXP_I32_F64</td>267 <td> DPFACTOR*4</td>266 <td>V_FREXP_MANT_F16</td> 267 <td>4</td> 268 268 </tr> 269 269 <tr> 270 270 <td>V_CEIL_F32</td> 271 271 <td>4</td> 272 <td>V_FREXP_MANT_F 16</td>272 <td>V_FREXP_MANT_F32</td> 273 273 <td>4</td> 274 274 </tr> … … 276 276 <td>V_CEIL_F64</td> 277 277 <td>DPFACTOR*4</td> 278 <td>V_FREXP_MANT_F 32</td>279 <td> 4</td>278 <td>V_FREXP_MANT_F64</td> 279 <td>DPFACTOR*4</td> 280 280 </tr> 281 281 <tr> 282 282 <td>V_CLREXCP</td> 283 283 <td>4</td> 284 <td>V_ FREXP_MANT_F64</td>285 <td> DPFACTOR*4</td>284 <td>V_LOG_CLAMP_F32</td> 285 <td>16</td> 286 286 </tr> 287 287 <tr> 288 288 <td>V_COS_F16</td> 289 289 <td>16</td> 290 <td>V_LOG_ CLAMP_F32</td>290 <td>V_LOG_F16</td> 291 291 <td>16</td> 292 292 </tr> … … 294 294 <td>V_COS_F32</td> 295 295 <td>16</td> 296 <td>V_LOG_F 16</td>296 <td>V_LOG_F32</td> 297 297 <td>16</td> 298 298 </tr> … … 300 300 <td>V_CVT_F16_F32</td> 301 301 <td>4</td> 302 <td>V_LOG_ F32</td>302 <td>V_LOG_LEGACY_F32</td> 303 303 <td>16</td> 304 304 </tr> … … 306 306 <td>V_CVT_F16_I16</td> 307 307 <td>4</td> 308 <td>V_ LOG_LEGACY_F32</td>309 <td> 16</td>308 <td>V_MBCNT_LO_U32_B32</td> 309 <td>4</td> 310 310 </tr> 311 311 <tr> 312 312 <td>V_CVT_F16_U16</td> 313 313 <td>4</td> 314 <td>V_MBCNT_ LO_U32_B32</td>314 <td>V_MBCNT_HI_U32_B32</td> 315 315 <td>4</td> 316 316 </tr> … … 318 318 <td>V_CVT_F32_F16</td> 319 319 <td>4</td> 320 <td>V_M BCNT_HI_U32_B32</td>320 <td>V_MOVRELD_B32</td> 321 321 <td>4</td> 322 322 </tr> … … 324 324 <td>V_CVT_F32_F64</td> 325 325 <td>DPFACTOR*4</td> 326 <td>V_MOVREL D_B32</td>326 <td>V_MOVRELSD_B32</td> 327 327 <td>4</td> 328 328 </tr> … … 330 330 <td>V_CVT_F32_I32</td> 331 331 <td>4</td> 332 <td>V_MOVRELS D_B32</td>332 <td>V_MOVRELS_B32</td> 333 333 <td>4</td> 334 334 </tr> … … 336 336 <td>V_CVT_F32_U32</td> 337 337 <td>4</td> 338 <td>V_MOV RELS_B32</td>338 <td>V_MOV_B32</td> 339 339 <td>4</td> 340 340 </tr> … … 342 342 <td>V_CVT_F32_UBYTE0</td> 343 343 <td>4</td> 344 <td>V_MOV_ B32</td>344 <td>V_MOV_FED_B32</td> 345 345 <td>4</td> 346 346 </tr> … … 348 348 <td>V_CVT_F32_UBYTE1</td> 349 349 <td>4</td> 350 <td>V_MOV_ FED_B32</td>350 <td>V_MOV_PRSV_B32</td> 351 351 <td>4</td> 352 352 </tr> … … 406 406 </tr> 407 407 <tr> 408 <td>V_CVT_NORM_I16_F16</td> 409 <td>4</td> 410 <td>V_READFIRSTLANE_B32</td> 411 <td>4</td> 412 </tr> 413 <tr> 414 <td>V_CVT_NORM_U16_F16</td> 415 <td>4</td> 416 <td>V_RNDNE_F16</td> 417 <td>4</td> 418 </tr> 419 <tr> 408 420 <td>V_CVT_OFF_F32_I4</td> 409 421 <td>4</td> 410 <td>V_R EADFIRSTLANE_B32</td>422 <td>V_RNDNE_F32</td> 411 423 <td>4</td> 412 424 </tr> … … 414 426 <td>V_CVT_RPI_I32_F32</td> 415 427 <td>4</td> 416 <td>V_RNDNE_F 16</td>417 <td> 4</td>428 <td>V_RNDNE_F64</td> 429 <td>DPFACTOR*4</td> 418 430 </tr> 419 431 <tr> 420 432 <td>V_CVT_U16_F16</td> 421 433 <td>4</td> 422 <td>V_R NDNE_F32</td>423 <td> 4</td>434 <td>V_RSQ_CLAMP_F32</td> 435 <td>16</td> 424 436 </tr> 425 437 <tr> 426 438 <td>V_CVT_U32_F32</td> 427 439 <td>4</td> 428 <td>V_RNDNE_F64</td>429 <td>DPFACTOR*4</td>430 </tr>431 <tr>432 <td>V_CVT_U32_F64</td>433 <td>DPFACTOR*4</td>434 <td>V_RSQ_CLAMP_F32</td>435 <td>16</td>436 </tr>437 <tr>438 <td>V_EXP_F16</td>439 <td>16</td>440 440 <td>V_RSQ_CLAMP_F64</td> 441 441 <td>DPFACTOR*8</td> 442 442 </tr> 443 443 <tr> 444 <td>V_CVT_U32_F64</td> 445 <td>DPFACTOR*4</td> 446 <td>V_RSQ_F16</td> 447 <td>16</td> 448 </tr> 449 <tr> 450 <td>V_EXP_F16</td> 451 <td>16</td> 452 <td>V_RSQ_F32</td> 453 <td>16</td> 454 </tr> 455 <tr> 444 456 <td>V_EXP_F32</td> 445 457 <td>16</td> 446 <td>V_RSQ_F16</td>447 <td>16</td>448 </tr>449 <tr>450 <td>V_EXP_LEGACY_F32</td>451 <td>16</td>452 <td>V_RSQ_F32</td>453 <td>16</td>454 </tr>455 <tr>456 <td>V_FFBH_I32</td>457 <td>4</td>458 458 <td>V_RSQ_F64</td> 459 459 <td>DPFACTOR*8</td> 460 460 </tr> 461 461 <tr> 462 <td>V_EXP_LEGACY_F32</td> 463 <td>16</td> 464 <td>V_RSQ_LEGACY_F32</td> 465 <td>16</td> 466 </tr> 467 <tr> 468 <td>V_FFBH_I32</td> 469 <td>4</td> 470 <td>V_SAT_PK_U8_I16</td> 471 <td>4</td> 472 </tr> 473 <tr> 462 474 <td>V_FFBH_U32</td> 463 475 <td>4</td> 464 <td>V_ RSQ_LEGACY_F32</td>465 <td> 16</td>476 <td>V_SCREEN_PARTITION_4SE_B32</td> 477 <td>4</td> 466 478 </tr> 467 479 <tr> … … 498 510 <td>V_FRACT_F32</td> 499 511 <td>4</td> 512 <td>V_SWAP_B32</td> 513 <td>8</td> 514 </tr> 515 <tr> 516 <td>V_FRACT_F64</td> 517 <td>DPFACTOR*4</td> 500 518 <td>V_TRUNC_F16</td> 501 519 <td>4</td> 502 520 </tr> 503 521 <tr> 504 <td>V_FR ACT_F64</td>505 <td> DPFACTOR*4</td>522 <td>V_FREXP_EXP_I16_F16</td> 523 <td>4</td> 506 524 <td>V_TRUNC_F32</td> 507 525 <td>4</td> 508 526 </tr> 509 527 <tr> 510 <td>V_FREXP_EXP_I 16_F16</td>528 <td>V_FREXP_EXP_I32_F32</td> 511 529 <td>4</td> 512 530 <td>V_TRUNC_F64</td> … … 537 555 <tbody> 538 556 <tr> 557 <td>V_ADD3_U32</td> 558 <td>4</td> 559 <td>V_MAD_LEGACY_U16</td> 560 <td>4</td> 561 </tr> 562 <tr> 539 563 <td>V_ADD_F64</td> 540 564 <td>DPFACTOR*4</td> 541 <td>V_MAD_LEGACY_F32</td> 565 <td>V_MAD_U16</td> 566 <td>4</td> 567 </tr> 568 <tr> 569 <td>V_ADD_LSHL_U32</td> 570 <td>4</td> 571 <td>V_MAD_U32_U16</td> 542 572 <td>4</td> 543 573 </tr> … … 545 575 <td>V_ALIGNBIT_B32</td> 546 576 <td>4</td> 547 <td>V_MAD_U 16</td>577 <td>V_MAD_U32_U24</td> 548 578 <td>4</td> 549 579 </tr> … … 551 581 <td>V_ALIGNBYTE_B32</td> 552 582 <td>4</td> 553 <td>V_MAD_U32_U24</td> 583 <td>V_MAD_U64_U32</td> 584 <td>16</td> 585 </tr> 586 <tr> 587 <td>V_AND_OR_B32</td> 588 <td>4</td> 589 <td>V_MAX3_F16</td> 554 590 <td>4</td> 555 591 </tr> … … 557 593 <td>V_ASHR_I64</td> 558 594 <td>DPFACTOR*4</td> 559 <td>V_MA D_U64_U32</td>560 <td> 16</td>595 <td>V_MAX3_F32</td> 596 <td>4</td> 561 597 </tr> 562 598 <tr> 563 599 <td>V_ASHRREV_I64</td> 564 600 <td>DPFACTOR*4</td> 565 <td>V_MAX3_ F32</td>601 <td>V_MAX3_I16</td> 566 602 <td>4</td> 567 603 </tr> … … 575 611 <td>V_BFE_U32</td> 576 612 <td>4</td> 613 <td>V_MAX3_U16</td> 614 <td>4</td> 615 </tr> 616 <tr> 617 <td>V_BFI_B32</td> 618 <td>4</td> 577 619 <td>V_MAX3_U32</td> 578 620 <td>4</td> 579 621 </tr> 580 622 <tr> 581 <td>V_ BFI_B32</td>623 <td>V_CUBEID_F32</td> 582 624 <td>4</td> 583 625 <td>V_MAX_F64</td> … … 585 627 </tr> 586 628 <tr> 587 <td>V_CUBEID_F32</td> 629 <td>V_CUBEMA_F32</td> 630 <td>4</td> 631 <td>V_MED3_F16</td> 632 <td>4</td> 633 </tr> 634 <tr> 635 <td>V_CUBESC_F32</td> 588 636 <td>4</td> 589 637 <td>V_MED3_F32</td> … … 591 639 </tr> 592 640 <tr> 593 <td>V_CUBEMA_F32</td> 641 <td>V_CUBETC_F32</td> 642 <td>4</td> 643 <td>V_MED3_I16</td> 644 <td>4</td> 645 </tr> 646 <tr> 647 <td>V_CVT_PK_U8_F32</td> 594 648 <td>4</td> 595 649 <td>V_MED3_I32</td> … … 597 651 </tr> 598 652 <tr> 599 <td>V_CUBESC_F32</td> 600 <td>4</td> 653 <td>V_DIV_FIXUP_F16</td> 654 <td>4</td> 655 <td>V_MED3_U16</td> 656 <td>4</td> 657 </tr> 658 <tr> 659 <td>V_DIV_FIXUP_F32</td> 660 <td>16</td> 601 661 <td>V_MED3_U32</td> 602 662 <td>4</td> 603 663 </tr> 604 664 <tr> 605 <td>V_CUBETC_F32</td> 606 <td>4</td> 665 <td>V_DIV_FIXUP_F64</td> 666 <td>DPFACTOR*4</td> 667 <td>V_MIN3_F16</td> 668 <td>4</td> 669 </tr> 670 <tr> 671 <td>V_DIV_FMAS_F32</td> 672 <td>16</td> 607 673 <td>V_MIN3_F32</td> 608 674 <td>4</td> 609 </tr>610 <tr>611 <td>V_CVT_PK_U8_F32</td>612 <td>4</td>613 <td>V_MIN3_I32</td>614 <td>4</td>615 </tr>616 <tr>617 <td>V_DIV_FIXUP_F32</td>618 <td>16</td>619 <td>V_MIN3_U32</td>620 <td>4</td>621 </tr>622 <tr>623 <td>V_DIV_FIXUP_F64</td>624 <td>DPFACTOR*4</td>625 <td>V_MIN_F64</td>626 <td>DPFACTOR*4</td>627 </tr>628 <tr>629 <td>V_DIV_FMAS_F32</td>630 <td>16</td>631 <td>V_MQSAD_PK_U16_U8</td>632 <td>16</td>633 675 </tr> 634 676 <tr> 635 677 <td>V_DIV_FMAS_F64</td> 636 678 <td>DPFACTOR*8</td> 637 <td>V_M QSAD_U32_U8</td>638 <td> 16</td>679 <td>V_MIN3_I16</td> 680 <td>4</td> 639 681 </tr> 640 682 <tr> 641 683 <td>V_DIV_SCALE_F32</td> 642 684 <td>16</td> 643 <td>V_M QSAD_U8</td>644 <td> 16</td>685 <td>V_MIN3_I32</td> 686 <td>4</td> 645 687 </tr> 646 688 <tr> 647 689 <td>V_DIV_SCALE_F64</td> 648 690 <td>DPFACTOR*4</td> 649 <td>V_M SAD_U8</td>691 <td>V_MIN3_U16</td> 650 692 <td>4</td> 651 693 </tr> … … 653 695 <td>V_MAD_F16</td> 654 696 <td>4</td> 655 <td>V_M ULLIT_F32</td>697 <td>V_MIN3_U32</td> 656 698 <td>4</td> 657 699 </tr> … … 659 701 <td>V_FMA_F32</td> 660 702 <td>4 or 16 (1)</td> 703 <td>V_MIN_F64</td> 704 <td>DPFACTOR*4</td> 705 </tr> 706 <tr> 707 <td>V_FMA_F64</td> 708 <td>DPFACTOR*8</td> 709 <td>V_MQSAD_PK_U16_U8</td> 710 <td>16</td> 711 </tr> 712 <tr> 713 <td>V_FMA_LEGACY_F16</td> 714 <td>4</td> 715 <td>V_MQSAD_U32_U8</td> 716 <td>16</td> 717 </tr> 718 <tr> 719 <td>V_LDEXP_F64</td> 720 <td>DPFACTOR*4</td> 721 <td>V_MQSAD_U8</td> 722 <td>16</td> 723 </tr> 724 <tr> 725 <td>V_LERP_U8</td> 726 <td>4</td> 727 <td>V_MSAD_U8</td> 728 <td>4</td> 729 </tr> 730 <tr> 731 <td>V_LSHL_ADD_U32</td> 732 <td>4</td> 733 <td>V_MULLIT_F32</td> 734 <td>4</td> 735 </tr> 736 <tr> 737 <td>V_LSHL_B64</td> 738 <td>DPFACTOR*4</td> 661 739 <td>V_MUL_F64</td> 662 740 <td>DPFACTOR*8</td> 663 741 </tr> 664 742 <tr> 665 <td>V_ FMA_F64</td>666 <td> DPFACTOR*8</td>743 <td>V_LSHL_OR_B32</td> 744 <td>4</td> 667 745 <td>V_MUL_HI_I32</td> 668 746 <td>16</td> 669 747 </tr> 670 748 <tr> 671 <td>V_L DEXP_F64</td>749 <td>V_LSHLREV_B64</td> 672 750 <td>DPFACTOR*4</td> 673 751 <td>V_MUL_HI_U32</td> … … 675 753 </tr> 676 754 <tr> 677 <td>V_L ERP_U8</td>678 <td> 4</td>755 <td>V_LSHR_B64</td> 756 <td>DPFACTOR*4</td> 679 757 <td>V_MUL_LO_I32</td> 680 758 <td>16</td> 681 759 </tr> 682 760 <tr> 683 <td>V_LSH L_B64</td>761 <td>V_LSHRREV_B64</td> 684 762 <td>DPFACTOR*4</td> 685 763 <td>V_MUL_LO_U32</td> … … 687 765 </tr> 688 766 <tr> 689 <td>V_LSHLREV_B64</td> 690 <td>DPFACTOR*4</td> 767 <td>V_MAD_F16</td> 768 <td>4</td> 769 <td>V_OR3_B32</td> 770 <td>4</td> 771 </tr> 772 <tr> 773 <td>V_MAD_F32</td> 774 <td>4</td> 691 775 <td>V_QSAD_PK_U16_U8</td> 692 776 <td>16</td> 693 777 </tr> 694 778 <tr> 695 <td>V_ LSHR_B64</td>696 <td> DPFACTOR*4</td>779 <td>V_MAD_I16</td> 780 <td>4</td> 697 781 <td>V_QSAD_U8</td> 698 782 <td>16</td> 699 783 </tr> 700 784 <tr> 701 <td>V_ LSHRREV_B64</td>702 <td> DPFACTOR*4</td>785 <td>V_MAD_I32_I16</td> 786 <td>4</td> 703 787 <td>V_SAD_HI_U8</td> 704 788 <td>4</td> 705 789 </tr> 706 790 <tr> 707 <td>V_MAD_ F16</td>791 <td>V_MAD_I32_I24</td> 708 792 <td>4</td> 709 793 <td>V_SAD_U16</td> … … 711 795 </tr> 712 796 <tr> 713 <td>V_MAD_ F32</td>714 <td> 4</td>797 <td>V_MAD_I64_I32</td> 798 <td>16</td> 715 799 <td>V_SAD_U32</td> 716 800 <td>4</td> 717 801 </tr> 718 802 <tr> 719 <td>V_MAD_ I16</td>803 <td>V_MAD_LEGACY_F16</td> 720 804 <td>4</td> 721 805 <td>V_SAD_U8</td> … … 723 807 </tr> 724 808 <tr> 725 <td>V_MAD_ I32_I24</td>809 <td>V_MAD_LEGACY_F32</td> 726 810 <td>4</td> 727 811 <td>V_TRIG_PREOP_F64</td> … … 729 813 </tr> 730 814 <tr> 731 <td>V_MAD_ I64_I32</td>732 <td> 16</td>733 <td> </td>734 <td> </td>815 <td>V_MAD_LEGACY_I16</td> 816 <td>4</td> 817 <td>V_XAD_U32</td> 818 <td>4</td> 735 819 </tr> 736 820 </tbody> 737 821 </table> 738 822 <p>(1) - for device with DP speed 1/2, 1/4 or 1/8 is 4 cycles, for other devices is 16 cycles</p> 823 <h3>VOP3P Instruction timings</h3> 824 <p>All VOP3P instructions take 4 cycles. All instruction can achieve throughput 1 instruction 825 per cycle.</p> 739 826 <h3>DS Instruction timings</h3> 740 827 <p>Timings of DS instructions includes only execution without waiting for completing