@@ -1184,73 +1184,74 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
1184
1184
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
1185
1185
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
1186
1186
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
1187
- ; GISEL-NEXT: v_trunc_f32_e32 v7, v5
1188
- ; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7
1189
- ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v4
1190
- ; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
1191
- ; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
1192
- ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
1193
- ; GISEL-NEXT: v_mul_lo_u32 v5, v7, v4
1194
- ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
1195
- ; GISEL-NEXT: v_mul_hi_u32 v9, v6, v4
1196
- ; GISEL-NEXT: v_mul_hi_u32 v4, v7, v4
1197
- ; GISEL-NEXT: v_mul_lo_u32 v10, v6, v8
1198
- ; GISEL-NEXT: v_mul_lo_u32 v11, v7, v8
1199
- ; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8
1187
+ ; GISEL-NEXT: v_trunc_f32_e32 v6, v5
1188
+ ; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
1189
+ ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v4
1190
+ ; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v6
1191
+ ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v5, 0
1192
+ ; GISEL-NEXT: v_mov_b32_e32 v4, v9
1193
+ ; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[4:5]
1194
+ ; GISEL-NEXT: v_mul_lo_u32 v4, v7, v8
1195
+ ; GISEL-NEXT: v_mul_hi_u32 v6, v5, v8
1196
+ ; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v5, v[9:10]
1200
1197
; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
1201
- ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
1198
+ ; GISEL-NEXT: v_mul_lo_u32 v10, v5, v9
1199
+ ; GISEL-NEXT: v_mul_lo_u32 v11, v7, v9
1200
+ ; GISEL-NEXT: v_mul_hi_u32 v12, v5, v9
1201
+ ; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
1202
+ ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
1202
1203
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1203
- ; GISEL-NEXT: v_add_i32_e32 v5 , vcc, v5, v9
1204
- ; GISEL-NEXT: v_cndmask_b32_e64 v5 , 0, 1, vcc
1205
- ; GISEL-NEXT: v_add_i32_e32 v5 , vcc, v10, v5
1206
- ; GISEL-NEXT: v_add_i32_e32 v4 , vcc, v11, v4
1207
- ; GISEL-NEXT: v_cndmask_b32_e64 v9 , 0, 1, vcc
1208
- ; GISEL-NEXT: v_add_i32_e32 v4 , vcc, v4 , v12
1204
+ ; GISEL-NEXT: v_add_i32_e32 v4 , vcc, v4, v6
1205
+ ; GISEL-NEXT: v_cndmask_b32_e64 v4 , 0, 1, vcc
1206
+ ; GISEL-NEXT: v_add_i32_e32 v4 , vcc, v10, v4
1207
+ ; GISEL-NEXT: v_add_i32_e32 v6 , vcc, v11, v8
1208
+ ; GISEL-NEXT: v_cndmask_b32_e64 v8 , 0, 1, vcc
1209
+ ; GISEL-NEXT: v_add_i32_e32 v6 , vcc, v6 , v12
1209
1210
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1210
- ; GISEL-NEXT: v_add_i32_e32 v9 , vcc, v9 , v10
1211
- ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
1212
- ; GISEL-NEXT: v_cndmask_b32_e64 v5 , 0, 1, vcc
1213
- ; GISEL-NEXT: v_add_i32_e32 v5 , vcc, v9, v5
1214
- ; GISEL-NEXT: v_add_i32_e32 v5 , vcc, v8, v5
1215
- ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6 , v4
1211
+ ; GISEL-NEXT: v_add_i32_e32 v8 , vcc, v8 , v10
1212
+ ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4
1213
+ ; GISEL-NEXT: v_cndmask_b32_e64 v6 , 0, 1, vcc
1214
+ ; GISEL-NEXT: v_add_i32_e32 v6 , vcc, v8, v6
1215
+ ; GISEL-NEXT: v_add_i32_e32 v6 , vcc, v9, v6
1216
+ ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v5 , v4
1216
1217
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
1217
- ; GISEL-NEXT: v_addc_u32_e32 v5 , vcc, v7, v5 , vcc
1218
+ ; GISEL-NEXT: v_addc_u32_e32 v12 , vcc, v7, v6 , vcc
1218
1219
; GISEL-NEXT: v_mov_b32_e32 v4, v9
1219
- ; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v5 , v[4:5]
1220
- ; GISEL-NEXT: v_ashrrev_i32_e32 v4 , 31, v1
1221
- ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
1220
+ ; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v12 , v[4:5]
1221
+ ; GISEL-NEXT: v_ashrrev_i32_e32 v6 , 31, v1
1222
+ ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
1222
1223
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
1223
- ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4 , vcc
1224
- ; GISEL-NEXT: v_xor_b32_e32 v10, v0, v4
1225
- ; GISEL-NEXT: v_mul_lo_u32 v0, v5 , v8
1226
- ; GISEL-NEXT: v_mul_lo_u32 v12 , v11, v9
1227
- ; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4
1224
+ ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v6 , vcc
1225
+ ; GISEL-NEXT: v_xor_b32_e32 v10, v0, v6
1226
+ ; GISEL-NEXT: v_mul_lo_u32 v0, v12 , v8
1227
+ ; GISEL-NEXT: v_mul_lo_u32 v4 , v11, v9
1228
+ ; GISEL-NEXT: v_xor_b32_e32 v13, v1, v6
1228
1229
; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8
1229
- ; GISEL-NEXT: v_mul_hi_u32 v8, v5 , v8
1230
- ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12
1231
- ; GISEL-NEXT: v_cndmask_b32_e64 v12 , 0, 1, vcc
1230
+ ; GISEL-NEXT: v_mul_hi_u32 v8, v12 , v8
1231
+ ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
1232
+ ; GISEL-NEXT: v_cndmask_b32_e64 v4 , 0, 1, vcc
1232
1233
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1233
1234
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1234
- ; GISEL-NEXT: v_mul_lo_u32 v1, v5 , v9
1235
- ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12 , v0
1236
- ; GISEL-NEXT: v_mul_hi_u32 v12 , v11, v9
1235
+ ; GISEL-NEXT: v_mul_lo_u32 v1, v12 , v9
1236
+ ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4 , v0
1237
+ ; GISEL-NEXT: v_mul_hi_u32 v4 , v11, v9
1237
1238
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8
1238
1239
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1239
- ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12
1240
- ; GISEL-NEXT: v_cndmask_b32_e64 v12 , 0, 1, vcc
1241
- ; GISEL-NEXT: v_add_i32_e32 v8 , vcc, v8, v12
1242
- ; GISEL-NEXT: v_mul_hi_u32 v9, v5 , v9
1240
+ ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v4
1241
+ ; GISEL-NEXT: v_cndmask_b32_e64 v4 , 0, 1, vcc
1242
+ ; GISEL-NEXT: v_add_i32_e32 v4 , vcc, v8, v4
1243
+ ; GISEL-NEXT: v_mul_hi_u32 v8, v12 , v9
1243
1244
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
1244
1245
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
1246
+ ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
1245
1247
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
1246
- ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
1247
1248
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
1248
- ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5 , v1, vcc
1249
+ ; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v12 , v1, vcc
1249
1250
; GISEL-NEXT: v_mul_lo_u32 v8, v13, v0
1250
1251
; GISEL-NEXT: v_mul_lo_u32 v9, v10, v1
1251
1252
; GISEL-NEXT: v_mul_hi_u32 v11, v10, v0
1252
1253
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
1253
- ; GISEL-NEXT: v_mov_b32_e32 v5 , 0x12d8fb
1254
+ ; GISEL-NEXT: v_mov_b32_e32 v4 , 0x12d8fb
1254
1255
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
1255
1256
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1256
1257
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
@@ -1265,40 +1266,39 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
1265
1266
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
1266
1267
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8
1267
1268
; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1
1268
- ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5 , v11, 0
1269
+ ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4 , v11, 0
1269
1270
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1270
1271
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1271
1272
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v8
1272
- ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5 , v12, v[1:2]
1273
+ ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4 , v12, v[1:2]
1273
1274
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v0
1274
- ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
1275
- ; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
1276
- ; GISEL-NEXT: s_subb_u32 s7, 0, 0
1277
1275
; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v13, v8, vcc
1278
1276
; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v13, v8
1279
- ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
1277
+ ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
1280
1278
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
1281
1279
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
1282
1280
; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
1283
- ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5
1281
+ ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v4
1284
1282
; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v9, s[4:5]
1285
1283
; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v1, vcc
1284
+ ; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
1286
1285
; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v11
1287
- ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v6 , 0
1286
+ ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5 , 0
1288
1287
; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v12, vcc
1289
- ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5
1288
+ ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
1290
1289
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
1291
1290
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
1292
1291
; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc
1293
1292
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
1293
+ ; GISEL-NEXT: s_subb_u32 s7, 0, 0
1294
1294
; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13
1295
- ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6 , v[8:9]
1295
+ ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v5 , v[8:9]
1296
1296
; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v14, vcc
1297
1297
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
1298
1298
; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc
1299
1299
; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0
1300
- ; GISEL-NEXT: v_mul_lo_u32 v13, v6 , v8
1301
- ; GISEL-NEXT: v_mul_hi_u32 v15, v6 , v0
1300
+ ; GISEL-NEXT: v_mul_lo_u32 v13, v5 , v8
1301
+ ; GISEL-NEXT: v_mul_hi_u32 v15, v5 , v0
1302
1302
; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc
1303
1303
; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0
1304
1304
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
@@ -1307,7 +1307,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
1307
1307
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
1308
1308
; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8
1309
1309
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
1310
- ; GISEL-NEXT: v_mul_hi_u32 v13, v6 , v8
1310
+ ; GISEL-NEXT: v_mul_hi_u32 v13, v5 , v8
1311
1311
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
1312
1312
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1313
1313
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13
@@ -1318,95 +1318,93 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
1318
1318
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
1319
1319
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
1320
1320
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
1321
- ; GISEL-NEXT: v_add_i32_e32 v8 , vcc, v6 , v0
1321
+ ; GISEL-NEXT: v_add_i32_e32 v5 , vcc, v5 , v0
1322
1322
; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc
1323
- ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8 , 0
1323
+ ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5 , 0
1324
1324
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
1325
- ; GISEL-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc
1326
- ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
1327
- ; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4
1325
+ ; GISEL-NEXT: v_cndmask_b32_e32 v7, v11, v9, vcc
1326
+ ; GISEL-NEXT: v_xor_b32_e32 v10, v7, v6
1327
+ ; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v13, v[1:2]
1328
+ ; GISEL-NEXT: v_cndmask_b32_e32 v9, v12, v14, vcc
1329
+ ; GISEL-NEXT: v_xor_b32_e32 v1, v9, v6
1330
+ ; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[7:8]
1328
1331
; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3
1329
- ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
1330
- ; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v14, vcc
1331
1332
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
1332
1333
; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
1333
- ; GISEL-NEXT: v_xor_b32_e32 v11 , v2, v9
1334
+ ; GISEL-NEXT: v_xor_b32_e32 v8 , v2, v9
1334
1335
; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
1335
- ; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6
1336
+ ; GISEL-NEXT: v_mul_lo_u32 v11, v5, v7
1336
1337
; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9
1337
- ; GISEL-NEXT: v_mul_hi_u32 v3, v8 , v0
1338
+ ; GISEL-NEXT: v_mul_hi_u32 v3, v5 , v0
1338
1339
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
1339
- ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
1340
- ; GISEL-NEXT: v_cndmask_b32_e64 v7 , 0, 1, vcc
1340
+ ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
1341
+ ; GISEL-NEXT: v_cndmask_b32_e64 v11 , 0, 1, vcc
1341
1342
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
1342
1343
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1343
- ; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6
1344
- ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7 , v2
1345
- ; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6
1344
+ ; GISEL-NEXT: v_mul_lo_u32 v3, v13, v7
1345
+ ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v11 , v2
1346
+ ; GISEL-NEXT: v_mul_hi_u32 v11, v5, v7
1346
1347
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
1347
1348
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1348
- ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
1349
- ; GISEL-NEXT: v_cndmask_b32_e64 v7 , 0, 1, vcc
1350
- ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
1351
- ; GISEL-NEXT: v_mul_hi_u32 v6 , v13, v6
1349
+ ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11
1350
+ ; GISEL-NEXT: v_cndmask_b32_e64 v11 , 0, 1, vcc
1351
+ ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11
1352
+ ; GISEL-NEXT: v_mul_hi_u32 v7 , v13, v7
1352
1353
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
1353
1354
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1354
1355
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
1355
- ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6 , v2
1356
- ; GISEL-NEXT: v_add_i32_e32 v0 , vcc, v8 , v0
1356
+ ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7 , v2
1357
+ ; GISEL-NEXT: v_add_i32_e32 v3 , vcc, v5 , v0
1357
1358
; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
1358
- ; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0
1359
- ; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2
1360
- ; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0
1361
- ; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
1362
- ; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4
1363
- ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
1359
+ ; GISEL-NEXT: v_mul_lo_u32 v5, v12, v3
1360
+ ; GISEL-NEXT: v_mul_lo_u32 v7, v8, v2
1361
+ ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v6
1362
+ ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc
1363
+ ; GISEL-NEXT: v_mul_hi_u32 v6, v8, v3
1364
+ ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
1365
+ ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1366
+ ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
1367
+ ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1368
+ ; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2
1369
+ ; GISEL-NEXT: v_mul_hi_u32 v3, v12, v3
1370
+ ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
1371
+ ; GISEL-NEXT: v_mul_hi_u32 v7, v8, v2
1372
+ ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
1364
1373
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1365
1374
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
1366
- ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1367
- ; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2
1368
- ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
1369
- ; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2
1370
- ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0
1371
1375
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1372
- ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
1373
- ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1374
- ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
1375
- ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3
1376
- ; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2
1377
- ; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
1378
- ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1379
- ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
1380
- ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v7, v0
1381
- ; GISEL-NEXT: v_mov_b32_e32 v0, v3
1382
- ; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1]
1383
- ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4
1384
- ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc
1385
- ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
1386
- ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
1387
- ; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
1388
- ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3
1389
- ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1390
- ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
1391
- ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
1376
+ ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
1377
+ ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v3, v5
1378
+ ; GISEL-NEXT: v_mul_hi_u32 v10, v12, v2
1379
+ ; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0
1380
+ ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1381
+ ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
1382
+ ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v5
1383
+ ; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v10, v[3:4]
1384
+ ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
1385
+ ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
1386
+ ; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
1387
+ ; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
1388
+ ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
1389
+ ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
1392
1390
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
1393
- ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
1394
- ; GISEL-NEXT: v_subbrev_u32_e32 v3 , vcc, 0, v3 , vcc
1395
- ; GISEL-NEXT: v_cndmask_b32_e64 v4 , -1, v6, s[4:5]
1396
- ; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10
1397
- ; GISEL-NEXT: v_addc_u32_e32 v7 , vcc, 0, v13 , vcc
1398
- ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
1391
+ ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
1392
+ ; GISEL-NEXT: v_subbrev_u32_e32 v5 , vcc, 0, v5 , vcc
1393
+ ; GISEL-NEXT: v_cndmask_b32_e64 v3 , -1, v6, s[4:5]
1394
+ ; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v7
1395
+ ; GISEL-NEXT: v_addc_u32_e32 v8 , vcc, 0, v10 , vcc
1396
+ ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
1399
1397
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
1400
- ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
1398
+ ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
1401
1399
; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
1402
- ; GISEL-NEXT: v_add_i32_e32 v3 , vcc, 1, v6
1403
- ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v7 , vcc
1400
+ ; GISEL-NEXT: v_add_i32_e32 v4 , vcc, 1, v6
1401
+ ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v8 , vcc
1404
1402
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
1405
- ; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v3 , vcc
1406
- ; GISEL-NEXT: v_cndmask_b32_e32 v3, v7 , v5, vcc
1407
- ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
1408
- ; GISEL-NEXT: v_cndmask_b32_e32 v2, v10 , v2, vcc
1409
- ; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3 , vcc
1403
+ ; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v4 , vcc
1404
+ ; GISEL-NEXT: v_cndmask_b32_e32 v4, v8 , v5, vcc
1405
+ ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
1406
+ ; GISEL-NEXT: v_cndmask_b32_e32 v2, v7 , v2, vcc
1407
+ ; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v4 , vcc
1410
1408
; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9
1411
1409
; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9
1412
1410
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9
0 commit comments