Skip to content

Commit 04546a0

Browse files
authored
[GlobalISel] Support vector G_UNMERGE_VALUES in computeKnownBits. (#112172)
This adds computeKnownBits support for vector->vector G_UNMERGE_VALUES, grabbing the known bits with an adjusted DemandedElts mask.
1 parent 9aef0fd commit 04546a0

File tree

4 files changed

+435
-386
lines changed

4 files changed

+435
-386
lines changed

llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp

+17-7
Original file line numberDiff line numberDiff line change
@@ -514,23 +514,33 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
514514
break;
515515
}
516516
case TargetOpcode::G_UNMERGE_VALUES: {
517-
if (DstTy.isVector())
518-
break;
519517
unsigned NumOps = MI.getNumOperands();
520518
Register SrcReg = MI.getOperand(NumOps - 1).getReg();
521-
if (MRI.getType(SrcReg).isVector())
522-
return; // TODO: Handle vectors.
519+
LLT SrcTy = MRI.getType(SrcReg);
523520

524-
KnownBits SrcOpKnown;
525-
computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1);
521+
if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType())
522+
return; // TODO: Handle vector->subelement unmerges
526523

527524
// Figure out the result operand index
528525
unsigned DstIdx = 0;
529526
for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
530527
++DstIdx)
531528
;
532529

533-
Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
530+
APInt SubDemandedElts = DemandedElts;
531+
if (SrcTy.isVector()) {
532+
unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1;
533+
SubDemandedElts =
534+
DemandedElts.zext(SrcTy.getNumElements()).shl(DstIdx * DstLanes);
535+
}
536+
537+
KnownBits SrcOpKnown;
538+
computeKnownBitsImpl(SrcReg, SrcOpKnown, SubDemandedElts, Depth + 1);
539+
540+
if (SrcTy.isVector())
541+
Known = SrcOpKnown;
542+
else
543+
Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
534544
break;
535545
}
536546
case TargetOpcode::G_BSWAP: {

llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll

+125-127
Original file line numberDiff line numberDiff line change
@@ -1184,73 +1184,74 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
11841184
; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
11851185
; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
11861186
; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
1187-
; GISEL-NEXT: v_trunc_f32_e32 v7, v5
1188-
; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v7
1189-
; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v4
1190-
; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
1191-
; GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], s6, v6, 0
1192-
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[5:6]
1193-
; GISEL-NEXT: v_mul_lo_u32 v5, v7, v4
1194-
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
1195-
; GISEL-NEXT: v_mul_hi_u32 v9, v6, v4
1196-
; GISEL-NEXT: v_mul_hi_u32 v4, v7, v4
1197-
; GISEL-NEXT: v_mul_lo_u32 v10, v6, v8
1198-
; GISEL-NEXT: v_mul_lo_u32 v11, v7, v8
1199-
; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8
1187+
; GISEL-NEXT: v_trunc_f32_e32 v6, v5
1188+
; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v6
1189+
; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v4
1190+
; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v6
1191+
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v5, 0
1192+
; GISEL-NEXT: v_mov_b32_e32 v4, v9
1193+
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v7, v[4:5]
1194+
; GISEL-NEXT: v_mul_lo_u32 v4, v7, v8
1195+
; GISEL-NEXT: v_mul_hi_u32 v6, v5, v8
1196+
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v5, v[9:10]
12001197
; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
1201-
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
1198+
; GISEL-NEXT: v_mul_lo_u32 v10, v5, v9
1199+
; GISEL-NEXT: v_mul_lo_u32 v11, v7, v9
1200+
; GISEL-NEXT: v_mul_hi_u32 v12, v5, v9
1201+
; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
1202+
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
12021203
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1203-
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v9
1204-
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1205-
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v5
1206-
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v11, v4
1207-
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1208-
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v12
1204+
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
1205+
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1206+
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v10, v4
1207+
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v8
1208+
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1209+
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
12091210
; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1210-
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
1211-
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
1212-
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1213-
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5
1214-
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5
1215-
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v6, v4
1211+
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
1212+
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v6, v4
1213+
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1214+
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
1215+
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
1216+
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v5, v4
12161217
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v11, 0
1217-
; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v7, v5, vcc
1218+
; GISEL-NEXT: v_addc_u32_e32 v12, vcc, v7, v6, vcc
12181219
; GISEL-NEXT: v_mov_b32_e32 v4, v9
1219-
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v5, v[4:5]
1220-
; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1
1221-
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
1220+
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s6, v12, v[4:5]
1221+
; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
1222+
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
12221223
; GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], s7, v11, v[9:10]
1223-
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
1224-
; GISEL-NEXT: v_xor_b32_e32 v10, v0, v4
1225-
; GISEL-NEXT: v_mul_lo_u32 v0, v5, v8
1226-
; GISEL-NEXT: v_mul_lo_u32 v12, v11, v9
1227-
; GISEL-NEXT: v_xor_b32_e32 v13, v1, v4
1224+
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
1225+
; GISEL-NEXT: v_xor_b32_e32 v10, v0, v6
1226+
; GISEL-NEXT: v_mul_lo_u32 v0, v12, v8
1227+
; GISEL-NEXT: v_mul_lo_u32 v4, v11, v9
1228+
; GISEL-NEXT: v_xor_b32_e32 v13, v1, v6
12281229
; GISEL-NEXT: v_mul_hi_u32 v1, v11, v8
1229-
; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8
1230-
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v12
1231-
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1230+
; GISEL-NEXT: v_mul_hi_u32 v8, v12, v8
1231+
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
1232+
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
12321233
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
12331234
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1234-
; GISEL-NEXT: v_mul_lo_u32 v1, v5, v9
1235-
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v12, v0
1236-
; GISEL-NEXT: v_mul_hi_u32 v12, v11, v9
1235+
; GISEL-NEXT: v_mul_lo_u32 v1, v12, v9
1236+
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
1237+
; GISEL-NEXT: v_mul_hi_u32 v4, v11, v9
12371238
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v8
12381239
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1239-
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v12
1240-
; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1241-
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
1242-
; GISEL-NEXT: v_mul_hi_u32 v9, v5, v9
1240+
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v4
1241+
; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1242+
; GISEL-NEXT: v_add_i32_e32 v4, vcc, v8, v4
1243+
; GISEL-NEXT: v_mul_hi_u32 v8, v12, v9
12431244
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
12441245
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
1246+
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v4, v1
12451247
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
1246-
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
12471248
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v11, v0
1248-
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v5, v1, vcc
1249+
; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v12, v1, vcc
12491250
; GISEL-NEXT: v_mul_lo_u32 v8, v13, v0
12501251
; GISEL-NEXT: v_mul_lo_u32 v9, v10, v1
12511252
; GISEL-NEXT: v_mul_hi_u32 v11, v10, v0
12521253
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
1253-
; GISEL-NEXT: v_mov_b32_e32 v5, 0x12d8fb
1254+
; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb
12541255
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
12551256
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
12561257
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
@@ -1265,40 +1266,39 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
12651266
; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
12661267
; GISEL-NEXT: v_add_i32_e32 v11, vcc, v0, v8
12671268
; GISEL-NEXT: v_mul_hi_u32 v12, v13, v1
1268-
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v11, 0
1269+
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v11, 0
12691270
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
12701271
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
12711272
; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v8
1272-
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v5, v12, v[1:2]
1273+
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v4, v12, v[1:2]
12731274
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v0
1274-
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], 0, v11, v[8:9]
1275-
; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
1276-
; GISEL-NEXT: s_subb_u32 s7, 0, 0
12771275
; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v13, v8, vcc
12781276
; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v13, v8
1279-
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5
1277+
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
12801278
; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
12811279
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
12821280
; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v8, vcc
1283-
; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5
1281+
; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v4
12841282
; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v9, s[4:5]
12851283
; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v1, vcc
1284+
; GISEL-NEXT: s_sub_u32 s6, 0, 0x12d8fb
12861285
; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v11
1287-
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v6, 0
1286+
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
12881287
; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v12, vcc
1289-
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v5
1288+
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
12901289
; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
12911290
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
12921291
; GISEL-NEXT: v_cndmask_b32_e32 v15, -1, v8, vcc
12931292
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s6, v7, v[1:2]
1293+
; GISEL-NEXT: s_subb_u32 s7, 0, 0
12941294
; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13
1295-
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v6, v[8:9]
1295+
; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], s7, v5, v[8:9]
12961296
; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v14, vcc
12971297
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v15
12981298
; GISEL-NEXT: v_cndmask_b32_e32 v9, v13, v1, vcc
12991299
; GISEL-NEXT: v_mul_lo_u32 v1, v7, v0
1300-
; GISEL-NEXT: v_mul_lo_u32 v13, v6, v8
1301-
; GISEL-NEXT: v_mul_hi_u32 v15, v6, v0
1300+
; GISEL-NEXT: v_mul_lo_u32 v13, v5, v8
1301+
; GISEL-NEXT: v_mul_hi_u32 v15, v5, v0
13021302
; GISEL-NEXT: v_cndmask_b32_e32 v14, v14, v16, vcc
13031303
; GISEL-NEXT: v_mul_hi_u32 v0, v7, v0
13041304
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v13
@@ -1307,7 +1307,7 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
13071307
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
13081308
; GISEL-NEXT: v_mul_lo_u32 v15, v7, v8
13091309
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
1310-
; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8
1310+
; GISEL-NEXT: v_mul_hi_u32 v13, v5, v8
13111311
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v15, v0
13121312
; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
13131313
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v13
@@ -1318,95 +1318,93 @@ define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) {
13181318
; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
13191319
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v13, v1
13201320
; GISEL-NEXT: v_add_i32_e32 v1, vcc, v8, v1
1321-
; GISEL-NEXT: v_add_i32_e32 v8, vcc, v6, v0
1321+
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v0
13221322
; GISEL-NEXT: v_addc_u32_e32 v13, vcc, v7, v1, vcc
1323-
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v8, 0
1323+
; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s6, v5, 0
13241324
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
1325-
; GISEL-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc
1326-
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s6, v13, v[1:2]
1327-
; GISEL-NEXT: v_xor_b32_e32 v1, v9, v4
1325+
; GISEL-NEXT: v_cndmask_b32_e32 v7, v11, v9, vcc
1326+
; GISEL-NEXT: v_xor_b32_e32 v10, v7, v6
1327+
; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s6, v13, v[1:2]
1328+
; GISEL-NEXT: v_cndmask_b32_e32 v9, v12, v14, vcc
1329+
; GISEL-NEXT: v_xor_b32_e32 v1, v9, v6
1330+
; GISEL-NEXT: v_mad_u64_u32 v[7:8], s[4:5], s7, v5, v[7:8]
13281331
; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v3
1329-
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], s7, v8, v[6:7]
1330-
; GISEL-NEXT: v_cndmask_b32_e32 v10, v12, v14, vcc
13311332
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v9
13321333
; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v9, vcc
1333-
; GISEL-NEXT: v_xor_b32_e32 v11, v2, v9
1334+
; GISEL-NEXT: v_xor_b32_e32 v8, v2, v9
13341335
; GISEL-NEXT: v_mul_lo_u32 v2, v13, v0
1335-
; GISEL-NEXT: v_mul_lo_u32 v7, v8, v6
1336+
; GISEL-NEXT: v_mul_lo_u32 v11, v5, v7
13361337
; GISEL-NEXT: v_xor_b32_e32 v12, v3, v9
1337-
; GISEL-NEXT: v_mul_hi_u32 v3, v8, v0
1338+
; GISEL-NEXT: v_mul_hi_u32 v3, v5, v0
13381339
; GISEL-NEXT: v_mul_hi_u32 v0, v13, v0
1339-
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v7
1340-
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1340+
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
1341+
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
13411342
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
13421343
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1343-
; GISEL-NEXT: v_mul_lo_u32 v3, v13, v6
1344-
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
1345-
; GISEL-NEXT: v_mul_hi_u32 v7, v8, v6
1344+
; GISEL-NEXT: v_mul_lo_u32 v3, v13, v7
1345+
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v11, v2
1346+
; GISEL-NEXT: v_mul_hi_u32 v11, v5, v7
13461347
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v3, v0
13471348
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1348-
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v7
1349-
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1350-
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
1351-
; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6
1349+
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v11
1350+
; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1351+
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v11
1352+
; GISEL-NEXT: v_mul_hi_u32 v7, v13, v7
13521353
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
13531354
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
13541355
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
1355-
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v6, v2
1356-
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
1356+
; GISEL-NEXT: v_add_i32_e32 v2, vcc, v7, v2
1357+
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v0
13571358
; GISEL-NEXT: v_addc_u32_e32 v2, vcc, v13, v2, vcc
1358-
; GISEL-NEXT: v_mul_lo_u32 v3, v12, v0
1359-
; GISEL-NEXT: v_mul_lo_u32 v6, v11, v2
1360-
; GISEL-NEXT: v_mul_hi_u32 v7, v11, v0
1361-
; GISEL-NEXT: v_mul_hi_u32 v0, v12, v0
1362-
; GISEL-NEXT: v_xor_b32_e32 v8, v10, v4
1363-
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v6
1359+
; GISEL-NEXT: v_mul_lo_u32 v5, v12, v3
1360+
; GISEL-NEXT: v_mul_lo_u32 v7, v8, v2
1361+
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v10, v6
1362+
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v6, vcc
1363+
; GISEL-NEXT: v_mul_hi_u32 v6, v8, v3
1364+
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
1365+
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1366+
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
1367+
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1368+
; GISEL-NEXT: v_mul_lo_u32 v6, v12, v2
1369+
; GISEL-NEXT: v_mul_hi_u32 v3, v12, v3
1370+
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v7, v5
1371+
; GISEL-NEXT: v_mul_hi_u32 v7, v8, v2
1372+
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
13641373
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
13651374
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
1366-
; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
1367-
; GISEL-NEXT: v_mul_lo_u32 v7, v12, v2
1368-
; GISEL-NEXT: v_add_i32_e32 v3, vcc, v6, v3
1369-
; GISEL-NEXT: v_mul_hi_u32 v6, v11, v2
1370-
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v7, v0
13711375
; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1372-
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v6
1373-
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1374-
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
1375-
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v0, v3
1376-
; GISEL-NEXT: v_mul_hi_u32 v7, v12, v2
1377-
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v10, 0
1378-
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1379-
; GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
1380-
; GISEL-NEXT: v_add_i32_e32 v13, vcc, v7, v0
1381-
; GISEL-NEXT: v_mov_b32_e32 v0, v3
1382-
; GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v5, v13, v[0:1]
1383-
; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v1, v4
1384-
; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v8, v4, vcc
1385-
; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], 0, v10, v[6:7]
1386-
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v11, v2
1387-
; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v12, v3, vcc
1388-
; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v12, v3
1389-
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1390-
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
1391-
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5
1376+
; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
1377+
; GISEL-NEXT: v_add_i32_e32 v7, vcc, v3, v5
1378+
; GISEL-NEXT: v_mul_hi_u32 v10, v12, v2
1379+
; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, 0
1380+
; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1381+
; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
1382+
; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v5
1383+
; GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v10, v[3:4]
1384+
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
1385+
; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v12, v5, vcc
1386+
; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v12, v5
1387+
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
1388+
; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
1389+
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4
13921390
; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
1393-
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
1394-
; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1395-
; GISEL-NEXT: v_cndmask_b32_e64 v4, -1, v6, s[4:5]
1396-
; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10
1397-
; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v13, vcc
1398-
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
1391+
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
1392+
; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
1393+
; GISEL-NEXT: v_cndmask_b32_e64 v3, -1, v6, s[4:5]
1394+
; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v7
1395+
; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v10, vcc
1396+
; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
13991397
; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
1400-
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
1398+
; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
14011399
; GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc
1402-
; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6
1403-
; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc
1400+
; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v6
1401+
; GISEL-NEXT: v_addc_u32_e32 v5, vcc, 0, v8, vcc
14041402
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
1405-
; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v3, vcc
1406-
; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc
1407-
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
1408-
; GISEL-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc
1409-
; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc
1403+
; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc
1404+
; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v5, vcc
1405+
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
1406+
; GISEL-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc
1407+
; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v4, vcc
14101408
; GISEL-NEXT: v_xor_b32_e32 v2, v2, v9
14111409
; GISEL-NEXT: v_xor_b32_e32 v3, v3, v9
14121410
; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9

0 commit comments

Comments
 (0)