Skip to content

Commit

Permalink
[RISCV][ISel] Remove redundant vmerge for vwsub(u).wv.
Browse files Browse the repository at this point in the history
  • Loading branch information
sun-jacobi committed Feb 5, 2024
1 parent 4926f12 commit a53a3de
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 10 deletions.
23 changes: 13 additions & 10 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13848,11 +13848,13 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
return InputRootReplacement;
}

// Fold (vwadd.wv y, (vmerge cond, x, 0)) -> vwadd.wv y, x, y, cond
// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond
// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond
// y will be the Passthru and cond will be the Mask.
static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) {
unsigned Opc = N->getOpcode();
assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL);
assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);

SDValue Y = N->getOperand(0);
SDValue MergeOp = N->getOperand(1);
Expand Down Expand Up @@ -13891,16 +13893,17 @@ static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) {
N->getFlags());
}

static SDValue performVWADDW_VLCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
static SDValue performVWADDSUBW_VLCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
[[maybe_unused]] unsigned Opc = N->getOpcode();
assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL);
assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL ||
Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL);

if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
return V;

return combineVWADDWSelect(N, DCI.DAG);
return combineVWADDSUBWSelect(N, DCI.DAG);
}

// Helper function for performMemPairCombine.
Expand Down Expand Up @@ -15973,10 +15976,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return combineToVWMACC(N, DAG, Subtarget);
case RISCVISD::VWADD_W_VL:
case RISCVISD::VWADDU_W_VL:
return performVWADDW_VLCombine(N, DCI, Subtarget);
case RISCVISD::SUB_VL:
case RISCVISD::VWSUB_W_VL:
case RISCVISD::VWSUBU_W_VL:
return performVWADDSUBW_VLCombine(N, DCI, Subtarget);
case RISCVISD::SUB_VL:
case RISCVISD::MUL_VL:
return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
case RISCVISD::VFMADD_VL:
Expand Down
73 changes: 73 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK

define <8 x i64> @vwsub_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
; CHECK-LABEL: vwsub_wv_mask_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
; CHECK-NEXT: vwsub.wv v12, v12, v8, v0.t
; CHECK-NEXT: vmv4r.v v8, v12
; CHECK-NEXT: ret
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
%sa = sext <8 x i32> %a to <8 x i64>
%ret = sub <8 x i64> %y, %sa
ret <8 x i64> %ret
}

define <8 x i64> @vwsubu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) {
; CHECK-LABEL: vwsubu_wv_mask_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu
; CHECK-NEXT: vwsubu.wv v12, v12, v8, v0.t
; CHECK-NEXT: vmv4r.v v8, v12
; CHECK-NEXT: ret
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
%sa = zext <8 x i32> %a to <8 x i64>
%ret = sub <8 x i64> %y, %sa
ret <8 x i64> %ret
}

define <8 x i64> @vwsubu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) {
; CHECK-LABEL: vwsubu_vv_mask_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
; CHECK-NEXT: vmv.v.i v12, 0
; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
; CHECK-NEXT: vwsubu.vv v12, v10, v8
; CHECK-NEXT: vmv4r.v v8, v12
; CHECK-NEXT: ret
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
%sa = zext <8 x i32> %a to <8 x i64>
%sy = zext <8 x i32> %y to <8 x i64>
%ret = sub <8 x i64> %sy, %sa
ret <8 x i64> %ret
}

define <8 x i64> @vwsub_wv_mask_v8i32_nonzero(<8 x i32> %x, <8 x i64> %y) {
; CHECK-LABEL: vwsub_wv_mask_v8i32_nonzero:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
; CHECK-NEXT: vmv.v.i v10, 1
; CHECK-NEXT: vmerge.vvm v16, v10, v8, v0
; CHECK-NEXT: vwsub.wv v8, v12, v16
; CHECK-NEXT: ret
%mask = icmp slt <8 x i32> %x, <i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42, i32 42>
%a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%sa = sext <8 x i32> %a to <8 x i64>
%ret = sub <8 x i64> %y, %sa
ret <8 x i64> %ret
}
73 changes: 73 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK

define <vscale x 8 x i64> @vwsub_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
; CHECK-LABEL: vwsub_wv_mask_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
; CHECK-NEXT: vwsub.wv v16, v16, v8, v0.t
; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: ret
%mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
%sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
%ret = sub <vscale x 8 x i64> %y, %sa
ret <vscale x 8 x i64> %ret
}

define <vscale x 8 x i64> @vwsubu_wv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
; CHECK-LABEL: vwsubu_wv_mask_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu
; CHECK-NEXT: vwsubu.wv v16, v16, v8, v0.t
; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: ret
%mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
%sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
%ret = sub <vscale x 8 x i64> %y, %sa
ret <vscale x 8 x i64> %ret
}

define <vscale x 8 x i64> @vwsubu_vv_mask_v8i32(<vscale x 8 x i32> %x, <vscale x 8 x i32> %y) {
; CHECK-LABEL: vwsubu_vv_mask_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
; CHECK-NEXT: vmv.v.i v16, 0
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
; CHECK-NEXT: vwsubu.vv v16, v12, v8
; CHECK-NEXT: vmv8r.v v8, v16
; CHECK-NEXT: ret
%mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> zeroinitializer
%sa = zext <vscale x 8 x i32> %a to <vscale x 8 x i64>
%sy = zext <vscale x 8 x i32> %y to <vscale x 8 x i64>
%ret = sub <vscale x 8 x i64> %sy, %sa
ret <vscale x 8 x i64> %ret
}

define <vscale x 8 x i64> @vwsub_wv_mask_v8i32_nonzero(<vscale x 8 x i32> %x, <vscale x 8 x i64> %y) {
; CHECK-LABEL: vwsub_wv_mask_v8i32_nonzero:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 42
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
; CHECK-NEXT: vmslt.vx v0, v8, a0
; CHECK-NEXT: vmv.v.i v12, 1
; CHECK-NEXT: vmerge.vvm v24, v12, v8, v0
; CHECK-NEXT: vwsub.wv v8, v16, v24
; CHECK-NEXT: ret
%mask = icmp slt <vscale x 8 x i32> %x, shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 42, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
%a = select <vscale x 8 x i1> %mask, <vscale x 8 x i32> %x, <vscale x 8 x i32> shufflevector (<vscale x 8 x i32> insertelement (<vscale x 8 x i32> poison, i32 1, i64 0), <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer)
%sa = sext <vscale x 8 x i32> %a to <vscale x 8 x i64>
%ret = sub <vscale x 8 x i64> %y, %sa
ret <vscale x 8 x i64> %ret
}

0 comments on commit a53a3de

Please sign in to comment.