From a53a3de04e52051b918d41acff37e93ff6a379c2 Mon Sep 17 00:00:00 2001 From: sun-jacobi Date: Sat, 3 Feb 2024 10:35:41 +0900 Subject: [PATCH] [RISCV][ISel] Remove redundant vmerge for vwsub(u).wv. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 23 +++--- .../RISCV/rvv/fixed-vectors-vwsub-mask.ll | 73 +++++++++++++++++++ .../CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll | 73 +++++++++++++++++++ 3 files changed, 159 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 5786ad9ad6482..73f3d4a8641d5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -13848,11 +13848,13 @@ static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, return InputRootReplacement; } -// Fold (vwadd.wv y, (vmerge cond, x, 0)) -> vwadd.wv y, x, y, cond +// Fold (vwadd(u).wv y, (vmerge cond, x, 0)) -> vwadd(u).wv y, x, y, cond +// (vwsub(u).wv y, (vmerge cond, x, 0)) -> vwsub(u).wv y, x, y, cond // y will be the Passthru and cond will be the Mask. -static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) { +static SDValue combineVWADDSUBWSelect(SDNode *N, SelectionDAG &DAG) { unsigned Opc = N->getOpcode(); - assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL); + assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL || + Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL); SDValue Y = N->getOperand(0); SDValue MergeOp = N->getOperand(1); @@ -13891,16 +13893,17 @@ static SDValue combineVWADDWSelect(SDNode *N, SelectionDAG &DAG) { N->getFlags()); } -static SDValue performVWADDW_VLCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const RISCVSubtarget &Subtarget) { +static SDValue performVWADDSUBW_VLCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { [[maybe_unused]] unsigned Opc = N->getOpcode(); - assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL); + assert(Opc == RISCVISD::VWADD_W_VL || Opc == RISCVISD::VWADDU_W_VL || + Opc == RISCVISD::VWSUB_W_VL || Opc == RISCVISD::VWSUBU_W_VL); if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) return V; - return combineVWADDWSelect(N, DCI.DAG); + return combineVWADDSUBWSelect(N, DCI.DAG); } // Helper function for performMemPairCombine. @@ -15973,10 +15976,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return combineToVWMACC(N, DAG, Subtarget); case RISCVISD::VWADD_W_VL: case RISCVISD::VWADDU_W_VL: - return performVWADDW_VLCombine(N, DCI, Subtarget); - case RISCVISD::SUB_VL: case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: + return performVWADDSUBW_VLCombine(N, DCI, Subtarget); + case RISCVISD::SUB_VL: case RISCVISD::MUL_VL: return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); case RISCVISD::VFMADD_VL: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll new file mode 100644 index 0000000000000..382f00913cb41 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK + +define <8 x i64> @vwsub_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) { +; CHECK-LABEL: vwsub_wv_mask_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu +; CHECK-NEXT: vwsub.wv v12, v12, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %mask = icmp slt <8 x i32> %x, + %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer + %sa = sext <8 x i32> %a to <8 x i64> + %ret = sub <8 x i64> %y, %sa + ret <8 x i64> %ret +} + +define <8 x i64> @vwsubu_wv_mask_v8i32(<8 x i32> %x, <8 x i64> %y) { +; CHECK-LABEL: vwsubu_wv_mask_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, tu, mu +; CHECK-NEXT: vwsubu.wv v12, v12, v8, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %mask = icmp slt <8 x i32> %x, + %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer + %sa = zext <8 x i32> %a to <8 x i64> + %ret = sub <8 x i64> %y, %sa + ret <8 x i64> %ret +} + +define <8 x i64> @vwsubu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) { +; CHECK-LABEL: vwsubu_vv_mask_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vwsubu.vv v12, v10, v8 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret + %mask = icmp slt <8 x i32> %x, + %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer + %sa = zext <8 x i32> %a to <8 x i64> + %sy = zext <8 x i32> %y to <8 x i64> + %ret = sub <8 x i64> %sy, %sa + ret <8 x i64> %ret +} + +define <8 x i64> @vwsub_wv_mask_v8i32_nonzero(<8 x i32> %x, <8 x i64> %y) { +; CHECK-LABEL: vwsub_wv_mask_v8i32_nonzero: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: vmerge.vvm v16, v10, v8, v0 +; CHECK-NEXT: vwsub.wv v8, v12, v16 +; CHECK-NEXT: ret + %mask = icmp slt <8 x i32> %x, + %a = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> + %sa = sext <8 x i32> %a to <8 x i64> + %ret = sub <8 x i64> %y, %sa + ret <8 x i64> %ret +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll new file mode 100644 index 0000000000000..0cc0063c1d41c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK + +define @vwsub_wv_mask_v8i32( %x, %y) { +; CHECK-LABEL: vwsub_wv_mask_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu +; CHECK-NEXT: vwsub.wv v16, v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %mask = icmp slt %x, shufflevector ( insertelement ( poison, i32 42, i64 0), poison, zeroinitializer) + %a = select %mask, %x, zeroinitializer + %sa = sext %a to + %ret = sub %y, %sa + ret %ret +} + +define @vwsubu_wv_mask_v8i32( %x, %y) { +; CHECK-LABEL: vwsubu_wv_mask_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, mu +; CHECK-NEXT: vwsubu.wv v16, v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %mask = icmp slt %x, shufflevector ( insertelement ( poison, i32 42, i64 0), poison, zeroinitializer) + %a = select %mask, %x, zeroinitializer + %sa = zext %a to + %ret = sub %y, %sa + ret %ret +} + +define @vwsubu_vv_mask_v8i32( %x, %y) { +; CHECK-LABEL: vwsubu_vv_mask_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vwsubu.vv v16, v12, v8 +; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: ret + %mask = icmp slt %x, shufflevector ( insertelement ( poison, i32 42, i64 0), poison, zeroinitializer) + %a = select %mask, %x, zeroinitializer + %sa = zext %a to + %sy = zext %y to + %ret = sub %sy, %sa + ret %ret +} + +define @vwsub_wv_mask_v8i32_nonzero( %x, %y) { +; CHECK-LABEL: vwsub_wv_mask_v8i32_nonzero: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 42 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmv.v.i v12, 1 +; CHECK-NEXT: vmerge.vvm v24, v12, v8, v0 +; CHECK-NEXT: vwsub.wv v8, v16, v24 +; CHECK-NEXT: ret + %mask = icmp slt %x, shufflevector ( insertelement ( poison, i32 42, i64 0), poison, zeroinitializer) + %a = select %mask, %x, shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) + %sa = sext %a to + %ret = sub %y, %sa + ret %ret +}