Changes between Version 9 and Version 10 of GcnInstrsVop2


Ignore:
Timestamp:
Nov 22, 2015, 9:00:19 PM (5 years ago)
Author:
trac
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • GcnInstrsVop2

    v9 v10  
    206206<tr>
    207207<th>Opcode</th>
     208<th>Opcode(VOP3)</th>
    208209<th>Mnemonic (GCN1.0/1.1)</th>
    209210<th>Mnemonic (GCN 1.2)</th>
     
    213214<tr>
    214215<td>0 (0x0)</td>
     216<td>256 (0x100)</td>
    215217<td>V_CNDMASK_B32</td>
    216218<td>V_CNDMASK_B32</td>
     
    218220<tr>
    219221<td>1 (0x1)</td>
     222<td>257 (0x101)</td>
    220223<td>V_READLANE_B32</td>
    221224<td>V_ADD_F32</td>
     
    223226<tr>
    224227<td>2 (0x2)</td>
     228<td>258 (0x102)</td>
    225229<td>V_WRITELANE_B32</td>
    226230<td>V_SUB_F32</td>
     
    228232<tr>
    229233<td>3 (0x3)</td>
     234<td>259 (0x103)</td>
    230235<td>V_ADD_F32</td>
    231236<td>V_SUBREV_F32</td>
     
    233238<tr>
    234239<td>4 (0x4)</td>
     240<td>260 (0x104)</td>
    235241<td>V_SUB_F32</td>
    236242<td>V_MUL_LEGACY_F32</td>
     
    238244<tr>
    239245<td>5 (0x5)</td>
     246<td>261 (0x105)</td>
    240247<td>V_SUBREV_F32</td>
    241248<td>V_MUL_F32</td>
     
    243250<tr>
    244251<td>6 (0x6)</td>
     252<td>262 (0x106)</td>
    245253<td>V_MAC_LEGACY_F32</td>
    246254<td>V_MUL_I32_I24</td>
     
    248256<tr>
    249257<td>7 (0x7)</td>
     258<td>263 (0x107)</td>
    250259<td>V_MUL_LEGACY_F32</td>
    251260<td>V_MUL_HI_I32_I24</td>
     
    253262<tr>
    254263<td>8 (0x8)</td>
     264<td>264 (0x108)</td>
    255265<td>V_MUL_F32</td>
    256266<td>V_MUL_U32_U24</td>
     
    258268<tr>
    259269<td>9 (0x9)</td>
     270<td>265 (0x109)</td>
    260271<td>V_MUL_I32_I24</td>
    261272<td>V_MUL_HI_U32_U24</td>
     
    263274<tr>
    264275<td>10 (0xa)</td>
     276<td>266 (0x10a)</td>
    265277<td>V_MUL_HI_I32_I24</td>
    266278<td>V_MIN_F32</td>
     
    268280<tr>
    269281<td>11 (0xb)</td>
     282<td>267 (0x10b)</td>
    270283<td>V_MUL_U32_U24</td>
    271284<td>V_MAX_F32</td>
     
    273286<tr>
    274287<td>12 (0xc)</td>
     288<td>268 (0x10c)</td>
    275289<td>V_MUL_HI_U32_U24</td>
    276290<td>V_MIN_I32</td>
     
    278292<tr>
    279293<td>13 (0xd)</td>
     294<td>269 (0x10d)</td>
    280295<td>V_MIN_LEGACY_F32</td>
    281296<td>V_MAX_I32</td>
     
    283298<tr>
    284299<td>14 (0xe)</td>
     300<td>270 (0x10e)</td>
    285301<td>V_MAX_LEGACY_F32</td>
    286302<td>V_MIN_U32</td>
     
    288304<tr>
    289305<td>15 (0xf)</td>
     306<td>271 (0x10f)</td>
    290307<td>V_MIN_F32</td>
    291308<td>V_MAX_U32</td>
     
    293310<tr>
    294311<td>16 (0x10)</td>
     312<td>272 (0x110)</td>
    295313<td>V_MAX_F32</td>
    296314<td>V_LSHRREV_B32</td>
     
    298316<tr>
    299317<td>17 (0x11)</td>
     318<td>273 (0x111)</td>
    300319<td>V_MIN_I32</td>
    301320<td>V_ASHRREV_I32</td>
     
    303322<tr>
    304323<td>18 (0x12)</td>
     324<td>274 (0x112)</td>
    305325<td>V_MAX_I32</td>
    306326<td>V_LSHLREV_B32</td>
     
    308328<tr>
    309329<td>19 (0x13)</td>
     330<td>275 (0x113)</td>
    310331<td>V_MIN_U32</td>
    311332<td>V_AND_B32</td>
     
    313334<tr>
    314335<td>20 (0x14)</td>
     336<td>276 (0x114)</td>
    315337<td>V_MAX_U32</td>
    316338<td>V_OR_B32</td>
     
    318340<tr>
    319341<td>21 (0x15)</td>
     342<td>277 (0x115)</td>
    320343<td>V_LSHR_B32</td>
    321344<td>V_XOR_B32</td>
     
    323346<tr>
    324347<td>22 (0x16)</td>
     348<td>278 (0x116)</td>
    325349<td>V_LSHRREV_B32</td>
    326350<td>V_MAC_F32</td>
     
    328352<tr>
    329353<td>23 (0x17)</td>
     354<td>279 (0x117)</td>
    330355<td>V_ASHR_I32</td>
    331356<td>V_MADMK_F32</td>
     
    333358<tr>
    334359<td>24 (0x18)</td>
     360<td>280 (0x118)</td>
    335361<td>V_ASHRREV_I32</td>
    336362<td>V_MADAK_F32</td>
     
    338364<tr>
    339365<td>25 (0x19)</td>
     366<td>281 (0x119)</td>
    340367<td>V_LSHL_B32</td>
    341368<td>V_ADD_U32</td>
     
    343370<tr>
    344371<td>26 (0x1a)</td>
     372<td>282 (0x11a)</td>
    345373<td>V_LSHLREV_B32</td>
    346374<td>V_SUB_U32</td>
     
    348376<tr>
    349377<td>27 (0x1b)</td>
     378<td>283 (0x11b)</td>
    350379<td>V_AND_B32</td>
    351380<td>V_SUBREV_U32</td>
     
    353382<tr>
    354383<td>28 (0x1c)</td>
     384<td>284 (0x11c)</td>
    355385<td>V_OR_B32</td>
    356386<td>V_ADDC_U32</td>
     
    358388<tr>
    359389<td>29 (0x1d)</td>
     390<td>285 (0x11d)</td>
    360391<td>V_XOR_B32</td>
    361392<td>V_SUBB_U32</td>
     
    363394<tr>
    364395<td>30 (0x1e)</td>
     396<td>286 (0x11e)</td>
    365397<td>V_BFM_B32</td>
    366398<td>V_SUBBREV_U32</td>
     
    368400<tr>
    369401<td>31 (0x1f)</td>
     402<td>287 (0x11f)</td>
    370403<td>V_MAC_F32</td>
    371404<td>V_ADD_F16</td>
     
    373406<tr>
    374407<td>32 (0x20)</td>
     408<td>288 (0x120)</td>
    375409<td>V_MADMK_F32</td>
    376410<td>V_SUB_F16</td>
     
    378412<tr>
    379413<td>33 (0x21)</td>
     414<td>289 (0x121)</td>
    380415<td>V_MADAK_F32</td>
    381416<td>V_SUBREV_F16</td>
     
    383418<tr>
    384419<td>34 (0x22)</td>
     420<td>290 (0x122)</td>
    385421<td>V_BCNT_U32_B32</td>
    386422<td>V_MUL_F16</td>
     
    388424<tr>
    389425<td>35 (0x23)</td>
     426<td>291 (0x123)</td>
    390427<td>V_MBCNT_LO_U32_B32</td>
    391428<td>V_MAC_F16</td>
     
    393430<tr>
    394431<td>36 (0x24)</td>
     432<td>292 (0x124)</td>
    395433<td>V_MBCNT_HI_U32_B32</td>
    396434<td>V_MADMK_F16</td>
     
    398436<tr>
    399437<td>37 (0x25)</td>
     438<td>293 (0x125)</td>
    400439<td>V_ADD_I32</td>
    401440<td>V_MADAK_F16</td>
     
    403442<tr>
    404443<td>38 (0x26)</td>
     444<td>294 (0x126)</td>
    405445<td>V_SUB_I32</td>
    406446<td>V_ADD_U16</td>
     
    408448<tr>
    409449<td>39 (0x27)</td>
     450<td>295 (0x127)</td>
    410451<td>V_SUBREV_I32</td>
    411452<td>V_SUB_U16</td>
     
    413454<tr>
    414455<td>40 (0x28)</td>
     456<td>296 (0x128)</td>
    415457<td>V_ADDC_U32</td>
    416458<td>V_SUBREV_U16</td>
     
    418460<tr>
    419461<td>41 (0x29)</td>
     462<td>297 (0x129)</td>
    420463<td>V_SUBB_U32</td>
    421464<td>V_MUL_LO_U16</td>
     
    423466<tr>
    424467<td>42 (0x2a)</td>
     468<td>298 (0x12a)</td>
    425469<td>V_SUBBREV_U32</td>
    426470<td>V_LSHLREV_B16</td>
     
    428472<tr>
    429473<td>43 (0x2b)</td>
     474<td>299 (0x12b)</td>
    430475<td>V_LDEXP_F32</td>
    431476<td>V_LSHRREV_B16</td>
     
    433478<tr>
    434479<td>44 (0x2c)</td>
     480<td>300 (0x12c)</td>
    435481<td>V_CVT_PKACCUM_U8_F32</td>
    436482<td>V_ASHRREV_I16</td>
     
    438484<tr>
    439485<td>45 (0x2d)</td>
     486<td>301 (0x12d)</td>
    440487<td>V_CVT_PKNORM_I16_F32</td>
    441488<td>V_MAX_F16</td>
     
    443490<tr>
    444491<td>46 (0x2e)</td>
     492<td>302 (0x12e)</td>
    445493<td>V_CVT_PKNORM_U16_F32</td>
    446494<td>V_MIN_F16</td>
     
    448496<tr>
    449497<td>47 (0x2f)</td>
     498<td>303 (0x12f)</td>
    450499<td>V_CVT_PKRTZ_F16_F32</td>
    451500<td>V_MAX_U16</td>
     
    453502<tr>
    454503<td>48 (0x30)</td>
     504<td>304 (0x130)</td>
    455505<td>V_CVT_PK_U16_U32</td>
    456506<td>V_MAX_I16</td>
     
    458508<tr>
    459509<td>49 (0x31)</td>
     510<td>305 (0x131)</td>
    460511<td>V_CVT_PK_I16_I32</td>
    461512<td>V_MIN_U16</td>
     
    463514<tr>
    464515<td>50 (0x32)</td>
     516<td>306 (0x132)</td>
    465517<td>--</td>
    466518<td>V_MIN_I16</td>
     
    468520<tr>
    469521<td>51 (0x33)</td>
     522<td>307 (0x133)</td>
    470523<td>--</td>
    471524<td>V_LDEXP_F16</td>
     
    481534Description: Add two FP value from SRC0 and SRC1 and store result to VDST.<br />
    482535Operation:<br />
    483 <code>VDST = (FLOAT)SRC0 + (FLOAT)SRC1</code></p>
     536<code>VDST = ASFLOAT(SRC0) + ASFLOAT(SRC1)</code></p>
    484537<h4>V_ADD_I32, V_ADD_U32</h4>
    485538<p>Opcode VOP2: 37 (0x25) for GCN 1.0/1.1; 25 (0x19) for GCN 1.2<br />
     
    538591Description: Count bits in SRC0, adds SSRC1, and store result to VDST.<br />
    539592Operation:<br />
    540 <code>VDST = SRC1
    541 for (UINT8 i = 0; i &lt; 32; i++)
    542     VDST += ((1U&lt;&lt;i) &amp; SRC0) != 0</code></p>
     593<code>VDST = SRC1 + BITCOUNT(SRC0)</code></p>
    543594<h4>V_BFM_B32</h4>
    544595<p>Opcode VOP2: 30 (0x1e) for GCN 1.0/1.1<br />
     
    551602<h4>V_CNDMASK_B32</h4>
    552603<p>Opcode VOP2: 0 (0x0) for GCN 1.0/1.1; 1 (0x0) for GCN 1.2<br />
    553 Opcode VOP3a: 259 (0x100) for GCN 1.0/1.1; 256 (0x100) for GCN 1.2<br />
     604Opcode VOP3a: 256 (0x100) for GCN 1.0/1.1; 256 (0x100) for GCN 1.2<br />
    554605Syntax VOP2: V_CNDMASK_B32 VDST, SRC0, SRC1, VCC<br />
    555606Syntax VOP3a: V_CNDMASK_B32 VDST, SRC0, SRC1, SSRC2(2)<br />
     
    558609Operation:<br />
    559610<code>VDST = SSRC2&amp;(1ULL&lt;&lt;LANEID) ? SRC1 : SRC0</code></p>
     611<h4>V_CVT_PKACCUM_U8_F32</h4>
     612<p>Opcode VOP2: 44 (0x2c) for GCN 1.0/1.1<br />
     613Opcode VOP3a: 300 (0x12c) for GCN 1.0/1.1<br />
     614Syntax: V_CVT_PKACCUM_U8_F32 VDST, SRC0, SRC1<br />
     615Description: Convert floating point value from SRC0 to unsigned byte value with
     616rounding mode from MODE register, and store this byte to (SRC1&amp;3)'th byte of VDST.<br />
     617Operation:<br />
     618<code>UINT8 byte = ((SRC1&amp;3) * 8)
     619UINT32 mask = 0xff &lt;&lt; byte
     620UINT8 VAL8 = 0
     621FLOAT f = RNDINT(ASFLOAT(SRC0))
     622if (f &gt; 255.0)
     623    VAL8 = 255
     624else if (f &lt; 0.0 || f == NaN)
     625    VAL8 = 0
     626else
     627    VAL8 = f
     628VDST = (VDST&amp;~mask) | (((UINT32)VAL8) &lt;&lt; byte)</code></p>
     629<h4>V_CVT_PKNORM_I16_F32</h4>
     630<p>Opcode VOP2: 45 (0x2d) for GCN 1.0/1.1<br />
     631Opcode VOP3a: 301 (0x12d) for GCN 1.0/1.1<br />
     632Syntax: V_CVT_PKNORM_I16_F32 VDST, SRC0, SRC1<br />
     633Description: Convert normalized FP value from SRC0 and SRC1 to signed 16-bit integers with
     634rounding to nearest to even (??), and store first value to low 16-bit and
     635second to high 16-bit of the VDST.<br />
     636Operation:<br />
     637<code>INT16 roundNorm(FLOAT S)
     638{
     639    FLOAT f = RNDNEINT(S*32767)
     640    if (f &gt; 32767.0)
     641         return 0x7fff
     642    else if (f &lt; -32767.0)
     643        return -0x7fff
     644    else if (f == NaN)
     645        return 0
     646    return (INT16)f
     647}
     648VDST = roundNorm(ASFLOAT(SRC0)) | ((UINT32)roundNorm(ASFLOAT(SRC1)) &lt;&lt; 16)</code></p>
     649<h4>V_CVT_PKNORM_U16_F32</h4>
     650<p>Opcode VOP2: 46 (0x2e) for GCN 1.0/1.1<br />
     651Opcode VOP3a: 302 (0x12e) for GCN 1.0/1.1<br />
     652Syntax: V_CVT_PKNORM_U16_F32 VDST, SRC0, SRC1<br />
     653Description: Convert normalized FP value from SRC0 and SRC1 to unsigned 16-bit integers with
     654rounding to nearest to even (??), and store first value to low 16-bit and
     655second to high 16-bit of the VDST.<br />
     656Operation:<br />
     657<code>UINT16 roundNorm(FLOAT S)
     658{
     659    FLOAT f = RNDNEINT(S*65535.0)
     660    INT16 VAL16 = 0
     661    if (f &gt; 65535.0)
     662        return 0x7fff
     663    else if (f &lt; 0.0 || f == NaN)
     664        return 0
     665    return (UINT16)f
     666}
     667VDST = roundNorm(ASFLOAT(SRC0)) | ((UINT32)roundNorm(ASFLOAT(SRC1)) &lt;&lt; 16)</code></p>
     668<h4>V_CVT_PKRTZ_F16_F32</h4>
     669<p>Opcode VOP2: 47 (0x2f) for GCN 1.0/1.1<br />
     670Opcode VOP3a: 303 (0x12f) for GCN 1.0/1.1<br />
     671Syntax: V_CVT_PKRTZ_F16_F32 VDST, SRC0, SRC1<br />
     672Description: Convert normalized FP value from SRC0 and SRC1 to half floating points with
     673rounding to zero, and store first value to low 16-bit and
     674second to high 16-bit of the VDST.<br />
     675Operation:<br />
     676<code>UINT16 D0 = ASINT16(CVT_HALF_RTZ(ASFLOAT(SRC0)))
     677UINT16 D1 = ASINT16(CVT_HALF_RTZ(ASFLOAT(SRC1)))
     678VDST = D0 | (((UINT32)D1) &lt;&lt; 16)</code></p>
     679<h4>V_CVT_PK_U16_U32</h4>
     680<p>Opcode VOP2: 48 (0x30) for GCN 1.0/1.1<br />
     681Opcode VOP3a: 304 (0x130) for GCN 1.0/1.1<br />
     682Syntax: V_CVT_PK_U16_U32 VDST, SRC0, SRC1<br />
     683Description: Convert unsigned value from SRC0 and SRC1 to unsigned 16-bit values with
     684clamping, and store first value to low 16-bit and second to high 16-bit of the VDST.<br />
     685Operation:<br />
     686<code>UINT16 D0 = MIN(SRC0, 0xffff)
     687UINT16 D1 = MIN(SRC1, 0xffff)
     688VDST = D0 | (((UINT32)D1) &lt;&lt; 16)</code></p>
     689<h4>V_CVT_PK_I16_I32</h4>
     690<p>Opcode VOP2: 49 (0x31) for GCN 1.0/1.1<br />
     691Opcode VOP3a: 305 (0x131) for GCN 1.0/1.1<br />
     692Syntax: V_CVT_PK_I16_I32 VDST, SRC0, SRC1<br />
     693Description: Convert signed value from SRC0 and SRC1 to signed 16-bit values with
     694clamping, and store first value to low 16-bit and second to high 16-bit of the VDST.<br />
     695Operation:<br />
     696<code>INT16 D0 = MAX(MIN((INT32)SRC0, 0x7fff), -0x8000)
     697INT16 D1 = MAX(MIN((INT32)SRC1, 0x7fff), -0x8000)
     698VDST = D0 | (((UINT32)D1) &lt;&lt; 16)</code></p>
     699<h4>V_LDEXP_F32</h4>
     700<p>Opcode VOP2: 43 (0x2b) for GCN 1.0/1.1<br />
     701Opcode VOP3a: 299 (0x12b) for GCN 1.0/1.1<br />
     702Syntax: V_LDEXP_F32 VDST, SRC0, SRC1<br />
     703Description: Do ldexp operation on SRC0 and SRC1 (multiply SRC0 by 2**(SRC1)).
     704SRC1 is signed integer, SRC0 is floating point value.<br />
     705Operation:<br />
     706<code>VDST = ASFLOAT(SRC0) * POW(2.0,SRC1)</code></p>
    560707<h4>V_LSHL_B32</h4>
    561708<p>Opcode VOP2: 25 (0x19) for GCN 1.0/1.1<br />
     
    592739Description: Multiply FP value from SRC0 by FP value from SRC1 and add result to VDST.<br />
    593740Operation:<br />
    594 <code>VDST = (FLOAT)SRC0 * (FLOAT)SRC1 + (FLOAT)VDST</code></p>
     741<code>VDST = ASFLOAT(SRC0) * ASFLOAT(SRC1) + ASFLOAT(VDST)</code></p>
    595742<h4>V_MAC_LEGACY_F32</h4>
    596743<p>Opcode VOP2: 6 (0x6) for GCN 1.0/1.1<br />
     
    600747If one of value is 0.0 then always do not change VDST (do not apply IEEE rules for 0.0*x).<br />
    601748Operation:<br />
    602 <code>if ((FLOAT)SRC0!=0.0 &amp;&amp; (FLOAT)SRC1!=0.0)
    603     VDST = (FLOAT)SRC0 * (FLOAT)SRC1 + (FLOAT)VDST</code></p>
     749<code>if (ASFLOAT(SRC0)!=0.0 &amp;&amp; ASFLOAT(SRC1)!=0.0)
     750    VDST = ASFLOAT(SRC0) * ASFLOAT(SRC1) + ASFLOAT(VDST)</code></p>
    604751<h4>V_MADMK_F32</h4>
    605752<p>Opcode: VOP2: 32 (0x20) for GCN 1.0/1.1; 23 (0x17) for GCN 1.2<br />
     
    610757after instruction word.<br />
    611758Operation:
    612 <code>VDST = (FLOAT)SRC0 * (FLOAT)FLOATLIT + (FLOAT)SRC1</code></p>
     759<code>VDST = ASFLOAT(SRC0) * ASFLOAT(FLOATLIT) + ASFLOAT(SRC1)</code></p>
    613760<h4>V_MADAK_F32</h4>
    614761<p>Opcode: VOP2: 33 (0x21) for GCN 1.0/1.1; 24 (0x18) for GCN 1.2<br />
     
    619766after instruction word.<br />
    620767Operation:
    621 <code>VDST = (FLOAT)SRC0 * (FLOAT)SRC1 + (FLOAT)FLOATLIT</code></p>
     768<code>VDST = ASFLOAT(SRC0) * ASFLOAT(SRC1) + ASFLOAT(FLOATLIT)</code></p>
    622769<h4>V_MAX_F32</h4>
    623770<p>Opcode VOP2: 16 (0x10) for GCN 1.0/1.1; 11 (0xb) for GCN 1.2<br />
     
    627774and store result to VDST.<br />
    628775Operation:<br />
    629 <code>VDST = (FLOAT)SRC0&gt;(FLOAT)SRC1 ? (FLOAT)SRC0 : (FLOAT)SRC1</code></p>
     776<code>VDST = MAX(ASFLOAT(SRC0), ASFLOAT(SRC1))</code></p>
    630777<h4>V_MAX_I32</h4>
    631 <p>Opcode VOP2: 18 (0x12) for GCN 1.0/1.1; 11 (0xd) for GCN 1.2<br />
    632 Opcode VOP3a: 274 (0x112) for GCN 1.0/1.1; 267 (0x10d) for GCN 1.2<br />
     778<p>Opcode VOP2: 18 (0x12) for GCN 1.0/1.1; 13 (0xd) for GCN 1.2<br />
     779Opcode VOP3a: 274 (0x112) for GCN 1.0/1.1; 269 (0x10d) for GCN 1.2<br />
    633780Syntax: V_MAX_I32 VDST, SRC0, SRC1<br />
    634781Description: Choose largest signed value from SRC0 and SRC1, and store result to VDST.<br />
    635782Operation:<br />
    636 <code>VDST = (INT32)SRC0&gt;(INT32)SRC1 ? SRC0 : SRC1</code></p>
     783<code>VDST = MAX((INT32)SRC0, (INT32)SRC1)</code></p>
    637784<h4>V_MAX_LEGACY_F32</h4>
    638785<p>Opcode VOP2: 14 (0xe) for GCN 1.0/1.1<br />
     
    643790(legacy rules for handling NaNs).<br />
    644791Operation:<br />
    645 <code>if ((FLOAT)SRC1!=NaN)
    646     VDST = (FLOAT)SRC0&gt;(FLOAT)SRC1 ? (FLOAT)SRC0 : (FLOAT)SRC1
     792<code>if (ASFLOAT(SRC1)!=NaN)
     793    VDST = MAX(ASFLOAT(SRC0), ASFLOAT(SRC1))
    647794else
    648795    VDST = NaN</code></p>
    649796<h4>V_MAX_U32</h4>
    650 <p>Opcode VOP2: 20 (0x14) for GCN 1.0/1.1; 13 (0xf) for GCN 1.2<br />
    651 Opcode VOP3a: 276 (0x114) for GCN 1.0/1.1; 269 (0x10f) for GCN 1.2<br />
     797<p>Opcode VOP2: 20 (0x14) for GCN 1.0/1.1; 15 (0xf) for GCN 1.2<br />
     798Opcode VOP3a: 276 (0x114) for GCN 1.0/1.1; 271 (0x10f) for GCN 1.2<br />
    652799Syntax: V_MAX_U32 VDST, SRC0, SRC1<br />
    653800Description: Choose largest unsigned value from SRC0 and SRC1, and store result to VDST.<br />
    654801Operation:<br />
    655 <code>VDST = SRC0&gt;SRC1 ? SRC0 : SRC1</code></p>
     802<code>VDST = MAX(SRC0, SRC1)</code></p>
    656803<h4>V_MBCNT_HI_U32_B32</h4>
    657804<p>Opcode VOP2: 36 (0x24) for GCN 1.0/1.1<br />
     
    663810Operation:<br />
    664811<code>UINT32 MASK = ((1ULL &lt;&lt; (LANEID-32)) - 1ULL) &amp; SRC0
    665 VDST = SRC1
    666 for (UINT8 i = 0; i &lt; 32; i++)
    667     VDST += ((1U&lt;&lt;i) &amp; MASK) != 0</code></p>
     812VDST = SRC1 + BITCOUNT(MASK)</code></p>
    668813<h4>V_MBCNT_LO_U32_B32</h4>
    669814<p>Opcode VOP2: 35 (0x23) for GCN 1.0/1.1<br />
     
    675820Operation:<br />
    676821<code>UINT32 MASK = ((1ULL &lt;&lt; LANEID) - 1ULL) &amp; SRC0
    677 VDST = SRC1
    678 for (UINT8 i = 0; i &lt; 32; i++)
    679     VDST += ((1U&lt;&lt;i) &amp; MASK) != 0</code></p>
     822VDST = SRC1 + BITCOUNT(MASK)</code></p>
    680823<h4>V_MIN_F32</h4>
    681824<p>Opcode VOP2: 15 (0xf) for GCN 1.0/1.1; 10 (0xa) for GCN 1.2<br />
     
    685828and store result to VDST.<br />
    686829Operation:<br />
    687 <code>VDST = (FLOAT)SRC0&lt;(FLOAT)SRC1 ? (FLOAT)SRC0 : (FLOAT)SRC1</code></p>
     830<code>VDST = MIN(ASFLOAT(SRC0), ASFLOAT(SRC1))</code></p>
    688831<h4>V_MIN_I32</h4>
    689832<p>Opcode VOP2: 17 (0x11) for GCN 1.0/1.1; 12 (0xc) for GCN 1.2<br />
     
    692835Description: Choose smallest signed value from SRC0 and SRC1, and store result to VDST.<br />
    693836Operation:<br />
    694 <code>VDST = (INT32)SRC0&lt;(INT32)SRC1 ? SRC0 : SRC1</code></p>
     837<code>VDST = MIN((INT32)SRC0, (INT32)SRC1)</code></p>
    695838<h4>V_MIN_LEGACY_F32</h4>
    696839<p>Opcode VOP2: 13 (0xd) for GCN 1.0/1.1<br />
     
    701844(legacy rules for handling NaNs).<br />
    702845Operation:<br />
    703 <code>if ((FLOAT)SRC1!=NaN)
    704     VDST = (FLOAT)SRC0&lt;(FLOAT)SRC1 ? (FLOAT)SRC0 : (FLOAT)SRC1
     846<code>if (ASFLOAT(SRC1)!=NaN)
     847    VDST = MIN(ASFLOAT(SRC0), ASFLOAT(SRC1))
    705848else
    706849    VDST = NaN</code></p>
     
    711854Description: Choose smallest unsigned value from SRC0 and SRC1, and store result to VDST.<br />
    712855Operation:<br />
    713 <code>VDST = SRC0&lt;SRC1 ? SRC0 : SRC1</code></p>
     856<code>VDST = MIN(SRC0, SRC1)</code></p>
    714857<h4>V_MUL_LEGACY_F32</h4>
    715858<p>Opcode VOP2: 7 (0x7) for GCN 1.0/1.1; 5 (0x4) for GCN 1.2<br />
     
    719862If one of value is 0.0 then always store 0.0 to VDST (do not apply IEEE rules for 0.0*x).<br />
    720863Operation:<br />
    721 <code>if ((FLOAT)SRC0!=0.0 &amp;&amp; (FLOAT)SRC1!=0.0)
    722     VDST = (FLOAT)SRC0 * (FLOAT)SRC1
     864<code>if (ASFLOAT(SRC0)!=0.0 &amp;&amp; ASFLOAT(SRC1)!=0.0)
     865    VDST = ASFLOAT(SRC0) * ASFLOAT(SRC1)
    723866else
    724867    VDST = 0.0</code></p>
     
    729872Description: Multiply FP value from SRC0 by FP value from SRC1 and store result to VDST.<br />
    730873Operation:<br />
    731 <code>VDST = (FLOAT)SRC0 * (FLOAT)SRC1</code></p>
     874<code>VDST = ASFLOAT(SRC0) * ASFLOAT(SRC1)</code></p>
    732875<h4>V_MUL_HI_I32_24</h4>
    733876<p>Opcode VOP2: 10 (0xa) for GCN 1.0/1.1; 7 (0x7) for GCN 1.2<br />
     
    790933Description: Subtract FP value of SRC1 from FP value of SRC0 and store result to VDST.<br />
    791934Operation:<br />
    792 <code>VDST = (FLOAT)SRC0 - (FLOAT)SRC1</code></p>
     935<code>VDST = ASFLOAT(SRC0) - ASFLOAT(SRC1)</code></p>
    793936<h4>V_SUB_I32, V_SUB_U32</h4>
    794937<p>Opcode VOP2: 38 (0x26) for GCN 1.0/1.1; 26 (0x1a) for GCN 1.2<br />
     
    826969Description: Subtract FP value of SRC0 from FP value of SRC1 and store result to VDST.<br />
    827970Operation:<br />
    828 <code>VDST = (FLOAT)SRC1 - (FLOAT)SRC0</code></p>
     971<code>VDST = ASFLOAT(SRC1) - ASFLOAT(SRC0)</code></p>
    829972<h4>V_SUBBREV_U32</h4>
    830973<p>Opcode VOP2: 42 (0x2a) for GCN 1.0/1.1; 30 (0x1e) for GCN 1.2<br />