diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 73ee8cf81adcd..fd024ffdeefde 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -111,13 +111,6 @@ class ARMDAGToDAGISel : public SelectionDAGISel { bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); - bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { - const ConstantSDNode *CN = cast(N); - Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); - Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); - return true; - } - bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, @@ -4123,17 +4116,15 @@ void ARMDAGToDAGISel::Select(SDNode *N) { SDValue Chain = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - SDValue N3 = N->getOperand(3); - SDValue InGlue = N->getOperand(4); + SDValue Flags = N->getOperand(3); assert(N1.getOpcode() == ISD::BasicBlock); assert(N2.getOpcode() == ISD::Constant); - assert(N3.getOpcode() == ISD::Register); unsigned CC = (unsigned)N2->getAsZExtVal(); - if (InGlue.getOpcode() == ARMISD::CMPZ) { - if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { - SDValue Int = InGlue.getOperand(0); + if (Flags.getOpcode() == ARMISD::CMPZ) { + if (Flags.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { + SDValue Int = Flags.getOperand(0); uint64_t ID = Int->getConstantOperandVal(1); // Handle low-overhead loops. @@ -4155,15 +4146,15 @@ void ARMDAGToDAGISel::Select(SDNode *N) { ReplaceUses(N, LoopEnd); CurDAG->RemoveDeadNode(N); - CurDAG->RemoveDeadNode(InGlue.getNode()); + CurDAG->RemoveDeadNode(Flags.getNode()); CurDAG->RemoveDeadNode(Int.getNode()); return; } } bool SwitchEQNEToPLMI; - SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI); - InGlue = N->getOperand(4); + SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI); + Flags = N->getOperand(3); if (SwitchEQNEToPLMI) { switch ((ARMCC::CondCodes)CC) { @@ -4179,25 +4170,18 @@ void ARMDAGToDAGISel::Select(SDNode *N) { } SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); - SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue }; - SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, - MVT::Glue, Ops); - Chain = SDValue(ResNode, 0); - if (N->getNumValues() == 2) { - InGlue = SDValue(ResNode, 1); - ReplaceUses(SDValue(N, 1), InGlue); - } - ReplaceUses(SDValue(N, 0), - SDValue(Chain.getNode(), Chain.getResNo())); - CurDAG->RemoveDeadNode(N); + Chain = CurDAG->getCopyToReg(Chain, dl, ARM::CPSR, Flags, SDValue()); + SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(ARM::CPSR, MVT::i32), Chain, + Chain.getValue(1)}; + CurDAG->SelectNodeTo(N, Opc, MVT::Other, Ops); return; } case ARMISD::CMPZ: { // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) // This allows us to avoid materializing the expensive negative constant. - // The CMPZ #0 is useless and will be peepholed away but we need to keep it - // for its glue output. + // The CMPZ #0 is useless and will be peepholed away but we need to keep + // it for its flags output. SDValue X = N->getOperand(0); auto *C = dyn_cast(N->getOperand(1).getNode()); if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { @@ -4224,7 +4208,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) { } if (Add) { SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; - CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); + CurDAG->MorphNodeTo(N, ARMISD::CMPZ, N->getVTList(), Ops2); } } // Other cases are autogenerated. @@ -4232,11 +4216,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::CMOV: { - SDValue InGlue = N->getOperand(4); + SDValue Flags = N->getOperand(3); - if (InGlue.getOpcode() == ARMISD::CMPZ) { + if (Flags.getOpcode() == ARMISD::CMPZ) { bool SwitchEQNEToPLMI; - SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI); + SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI); if (SwitchEQNEToPLMI) { SDValue ARMcc = N->getOperand(2); @@ -4253,10 +4237,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) { } SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, - N->getOperand(3), N->getOperand(4)}; + N->getOperand(3)}; CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); } - } // Other cases are autogenerated. break; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 6b290135c5bcb..c9250e4ed3422 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -4924,14 +4924,11 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, CC == ISD::SETUGT && isa(LHS.getOperand(1)) && LHS.getConstantOperandVal(1) < 31) { unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1; - SDValue Shift = DAG.getNode(ARMISD::LSLS, dl, - DAG.getVTList(MVT::i32, MVT::i32), - LHS.getOperand(0), - DAG.getConstant(ShiftAmt, dl, MVT::i32)); - SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, - Shift.getValue(1), SDValue()); + SDValue Shift = + DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, FlagsVT), + LHS.getOperand(0), DAG.getConstant(ShiftAmt, dl, MVT::i32)); ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32); - return Chain.getValue(1); + return Shift.getValue(1); } ARMCC::CondCodes CondCode = IntCCToARMCC(CC); @@ -4963,7 +4960,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, break; } ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); + return DAG.getNode(CompareType, dl, FlagsVT, LHS, RHS); } /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. @@ -4978,24 +4975,7 @@ SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, else Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl, FlagsVT, LHS); - return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Flags); -} - -/// duplicateCmp - Glue values can have only one use, so this function -/// duplicates a comparison node. -SDValue -ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { - unsigned Opc = Cmp.getOpcode(); - SDLoc DL(Cmp); - if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) - return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); - - assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); - SDValue Flags = Cmp.getOperand(0); - assert((Flags.getOpcode() == ARMISD::CMPFP || - Flags.getOpcode() == ARMISD::CMPFPw0) && - "unexpected operand of FMSTAT"); - return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Flags); + return DAG.getNode(ARMISD::FMSTAT, dl, FlagsVT, Flags); } // This function returns three things: the arithmetic computation itself @@ -5023,7 +5003,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, case ISD::SADDO: ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); - OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS); break; case ISD::UADDO: ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); @@ -5032,17 +5012,17 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, Value = DAG.getNode(ARMISD::ADDC, dl, DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS) .getValue(0); - OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS); break; case ISD::SSUBO: ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); - OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS); break; case ISD::USUBO: ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); - OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS); break; case ISD::UMULO: // We generate a UMUL_LOHI and then check if the high word is 0. @@ -5050,7 +5030,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, Value = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(Op.getValueType(), Op.getValueType()), LHS, RHS); - OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1), DAG.getConstant(0, dl, MVT::i32)); Value = Value.getValue(0); // We only want the low 32 bits for the result. break; @@ -5061,7 +5041,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, Value = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(Op.getValueType(), Op.getValueType()), LHS, RHS); - OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1), DAG.getNode(ISD::SRA, dl, Op.getValueType(), Value.getValue(0), DAG.getConstant(31, dl, MVT::i32))); @@ -5081,15 +5061,14 @@ ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const { SDValue Value, OverflowCmp; SDValue ARMcc; std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDLoc dl(Op); // We use 0 and 1 as false and true values. SDValue TVal = DAG.getConstant(1, dl, MVT::i32); SDValue FVal = DAG.getConstant(0, dl, MVT::i32); EVT VT = Op.getValueType(); - SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, - ARMcc, CCR, OverflowCmp); + SDValue Overflow = + DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); @@ -5226,11 +5205,9 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Value, OverflowCmp; SDValue ARMcc; std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); EVT VT = Op.getValueType(); - return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR, - OverflowCmp, DAG); + return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG); } // Convert: @@ -5258,14 +5235,9 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { False = SelectTrue; } - if (True.getNode() && False.getNode()) { - EVT VT = Op.getValueType(); - SDValue ARMcc = Cond.getOperand(2); - SDValue CCR = Cond.getOperand(3); - SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); - assert(True.getValueType() == VT); - return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); - } + if (True.getNode() && False.getNode()) + return getCMOV(dl, Op.getValueType(), True, False, Cond.getOperand(2), + Cond.getOperand(3), DAG); } } @@ -5330,8 +5302,8 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, } SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, - SDValue TrueVal, SDValue ARMcc, SDValue CCR, - SDValue Cmp, SelectionDAG &DAG) const { + SDValue TrueVal, SDValue ARMcc, + SDValue Flags, SelectionDAG &DAG) const { if (!Subtarget->hasFP64() && VT == MVT::f64) { FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), FalseVal); @@ -5344,15 +5316,13 @@ SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue FalseHigh = FalseVal.getValue(1); SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, - ARMcc, CCR, Cmp); + ARMcc, Flags); SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, - ARMcc, CCR, duplicateCmp(Cmp, DAG)); + ARMcc, Flags); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); - } else { - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, - Cmp); } + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Flags); } static bool isGTorGE(ISD::CondCode CC) { @@ -5625,12 +5595,11 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { } SDValue ARMcc; - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); // Choose GE over PL, which vsel does now support if (ARMcc->getAsZExtVal() == ARMCC::PL) ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32); - return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); + return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG); } ARMCC::CondCodes CondCode, CondCode2; @@ -5660,13 +5629,10 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); + SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); - // FIXME: Needs another CMP because flag can have but one use. - SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); - Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); + Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG); } return Result; } @@ -5767,9 +5733,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { RHS = DAG.getNode(ISD::AND, dl, MVT::i32, bitcastf32Toi32(RHS, DAG), Mask); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, - Chain, Dest, ARMcc, CCR, Cmp); + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, + Cmp); } SDValue LHS1, LHS2; @@ -5780,9 +5745,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; - return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); + return DAG.getNode(ARMISD::BCC_i64, dl, MVT::Other, Ops); } return SDValue(); @@ -5816,9 +5780,8 @@ SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { (ARMCC::CondCodes)cast(ARMcc)->getZExtValue(); CondCode = ARMCC::getOppositeCondition(CondCode); ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, OverflowCmp); } @@ -5870,18 +5833,15 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { CondCode = ARMCC::getOppositeCondition(CondCode); ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); } - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, OverflowCmp); } if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, - Chain, Dest, ARMcc, CCR, Cmp); + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp); } if (getTargetMachine().Options.UnsafeFPMath && @@ -5896,14 +5856,12 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; - SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); + SDValue Ops[] = {Chain, Dest, ARMcc, Cmp}; + SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32); - SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; - Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); + SDValue Ops[] = {Res, Dest, ARMcc, Cmp}; + Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops); } return Res; } @@ -6408,7 +6366,6 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); @@ -6423,8 +6380,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); - SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, - ARMcc, CCR, CmpLo); + SDValue Lo = + DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo); SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); SDValue HiBigShift = Opc == ISD::SRA @@ -6433,8 +6390,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, : DAG.getConstant(0, dl, VT); SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); - SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, - ARMcc, CCR, CmpHi); + SDValue Hi = + DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); @@ -6452,7 +6409,6 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); assert(Op.getOpcode() == ISD::SHL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, @@ -6466,14 +6422,14 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); - SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, - ARMcc, CCR, CmpHi); + SDValue Hi = + DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi); SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, - DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo); + DAG.getConstant(0, dl, VT), ARMcc, CmpLo); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); @@ -7060,11 +7016,8 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) { SDValue TVal = DAG.getConstant(1, DL, MVT::i32); SDValue ARMcc = DAG.getConstant( IntCCToARMCC(cast(Cond)->get()), DL, MVT::i32); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR, - Cmp.getValue(1), SDValue()); return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc, - CCR, Chain.getValue(1)); + Cmp.getValue(1)); } /// isVMOVModifiedImm - Check if the specified splat value corresponds to a @@ -10613,21 +10566,14 @@ SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const { ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); - // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit - // in CMPFP and CMPFPE, but instead it should be made explicit by these - // instructions using a chain instead of glue. This would also fix the problem - // here (and also in LowerSELECT_CC) where we generate two comparisons when - // CondCode2 != AL. SDValue True = DAG.getConstant(1, dl, VT); SDValue False = DAG.getConstant(0, dl, VT); SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling); - SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG); + SDValue Result = getCMOV(dl, VT, False, True, ARMcc, Cmp, DAG); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32); - Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling); - Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG); + Result = getCMOV(dl, VT, Result, True, ARMcc, Cmp, DAG); } return DAG.getMergeValues({Result, Chain}, dl); } @@ -15057,7 +15003,7 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) { } // Check that N is CMPZ(CSINC(0, 0, CC, X)), -// or CMPZ(CMOV(1, 0, CC, $cpsr, X)) +// or CMPZ(CMOV(1, 0, CC, X)) // return X if valid. static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) { if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1))) @@ -15081,22 +15027,22 @@ static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) { if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) && isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) { CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2); - return CSInc.getOperand(4); + return CSInc.getOperand(3); } if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) && isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) { CC = ARMCC::getOppositeCondition( (ARMCC::CondCodes)CSInc.getConstantOperandVal(2)); - return CSInc.getOperand(4); + return CSInc.getOperand(3); } return SDValue(); } static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) { // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in - // t92: glue = ARMISD::CMPZ t74, 0 + // t92: flags = ARMISD::CMPZ t74, 0 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92 - // t96: glue = ARMISD::CMPZ t93, 0 + // t96: flags = ARMISD::CMPZ t93, 0 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96 ARMCC::CondCodes Cond; if (SDValue C = IsCMPZCSINC(N, Cond)) @@ -18187,7 +18133,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDValue Op0 = CMOV->getOperand(0); SDValue Op1 = CMOV->getOperand(1); auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue(); - SDValue CmpZ = CMOV->getOperand(4); + SDValue CmpZ = CMOV->getOperand(3); // The compare must be against zero. if (!isNullConstant(CmpZ->getOperand(1))) @@ -18431,12 +18377,11 @@ static SDValue PerformHWLoopCombine(SDNode *N, /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND. SDValue ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { - SDValue Cmp = N->getOperand(4); + SDValue Cmp = N->getOperand(3); if (Cmp.getOpcode() != ARMISD::CMPZ) // Only looking at NE cases. return SDValue(); - EVT VT = N->getValueType(0); SDLoc dl(N); SDValue LHS = Cmp.getOperand(0); SDValue RHS = Cmp.getOperand(1); @@ -18445,17 +18390,17 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { SDValue ARMcc = N->getOperand(2); ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal(); - // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0)) - // -> (brcond Chain BB CC CPSR Cmp) + // (brcond Chain BB ne (cmpz (and (cmov 0 1 CC Flags) 1) 0)) + // -> (brcond Chain BB CC Flags) if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() && LHS->getOperand(0)->getOpcode() == ARMISD::CMOV && LHS->getOperand(0)->hasOneUse() && isNullConstant(LHS->getOperand(0)->getOperand(0)) && isOneConstant(LHS->getOperand(0)->getOperand(1)) && isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) { - return DAG.getNode( - ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2), - LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4)); + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, BB, + LHS->getOperand(0)->getOperand(2), + LHS->getOperand(0)->getOperand(3)); } return SDValue(); @@ -18464,7 +18409,7 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. SDValue ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { - SDValue Cmp = N->getOperand(4); + SDValue Cmp = N->getOperand(3); if (Cmp.getOpcode() != ARMISD::CMPZ) // Only looking at EQ and NE cases. return SDValue(); @@ -18504,42 +18449,38 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { /// FIXME: Turn this into a target neutral optimization? SDValue Res; if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { - Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, - N->getOperand(3), Cmp); + Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, Cmp); } else if (CC == ARMCC::EQ && TrueVal == RHS) { SDValue ARMcc; SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); - Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, - N->getOperand(3), NewCmp); + Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, NewCmp); } - // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0)) - // -> (cmov F T CC CPSR Cmp) + // (cmov F T ne (cmpz (cmov 0 1 CC Flags) 0)) + // -> (cmov F T CC Flags) if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() && isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) { return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, - LHS->getOperand(2), LHS->getOperand(3), - LHS->getOperand(4)); + LHS->getOperand(2), LHS->getOperand(3)); } if (!VT.isInteger()) return SDValue(); // Fold away an unneccessary CMPZ/CMOV - // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) -> - // if C1==EQ -> CMOV A, B, C2, $cpsr, D - // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D + // CMOV A, B, C1, (CMPZ (CMOV 1, 0, C2, D), 0) -> + // if C1==EQ -> CMOV A, B, C2, D + // if C1==NE -> CMOV A, B, NOT(C2), D if (N->getConstantOperandVal(2) == ARMCC::EQ || N->getConstantOperandVal(2) == ARMCC::NE) { ARMCC::CondCodes Cond; - if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) { + if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) { if (N->getConstantOperandVal(2) == ARMCC::NE) Cond = ARMCC::getOppositeCondition(Cond); return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0), N->getOperand(1), - DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32), - N->getOperand(3), C); + DAG.getConstant(Cond, SDLoc(N), MVT::i32), C); } } @@ -18579,10 +18520,8 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1 SDValue Sub = DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS); - SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, - Sub.getValue(1), SDValue()); Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc, - N->getOperand(3), CPSRGlue.getValue(1)); + Sub.getValue(1)); FalseVal = Sub; } } else if (isNullConstant(TrueVal)) { @@ -18593,11 +18532,9 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1 SDValue Sub = DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS); - SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, - Sub.getValue(1), SDValue()); Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal, DAG.getConstant(ARMCC::NE, dl, MVT::i32), - N->getOperand(3), CPSRGlue.getValue(1)); + Sub.getValue(1)); FalseVal = Sub; } } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 344a0ad91e517..4fa600e0cfcc4 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -978,13 +978,11 @@ class VectorType; bool isUnsupportedFloatingType(EVT VT) const; SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, - SDValue ARMcc, SDValue CCR, SDValue Cmp, - SelectionDAG &DAG) const; + SDValue ARMcc, SDValue Flags, SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, const SDLoc &dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl, bool Signaling = false) const; - SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td index d0678f378da1e..041601748b1f7 100644 --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -167,16 +167,6 @@ def pred : PredicateOperand, PredicateOp, - ComplexPattern { - let MIOperandInfo = (ops i32imm, i32imm); - let PrintMethod = "printPredicateOperand"; -} - // Conditional code result for instructions whose 's' bit is set, e.g. subs. def CCOutOperand : AsmOperandClass { let Name = "CCOut"; @@ -1134,6 +1124,9 @@ class ARMV5MOPat : Pat { class ARMV6Pat : Pat { list Predicates = [IsARM, HasV6]; } +class ARMV6T2Pat : Pat { + list Predicates = [IsARM, HasV6T2]; +} class VFPPat : Pat { list Predicates = [HasVFP2]; } diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index db38b43279b86..718cb964ab7c3 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -14,6 +14,9 @@ // ARM specific DAG Nodes. // +/// Value type used for "condition code" operands. +defvar CondCodeVT = i32; + /// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV). defvar FlagsVT = i32; @@ -29,12 +32,19 @@ def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>; def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; -def SDT_ARMCMov : SDTypeProfile<1, 3, - [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, - SDTCisVT<3, i32>]>; +def SDT_ARMCMov : SDTypeProfile<1, 4, [ + /* any */ // result + SDTCisSameAs<1, 0>, // value on false + SDTCisSameAs<2, 0>, // value on true + SDTCisVT<3, CondCodeVT>, // condition code + SDTCisVT<4, FlagsVT>, // in flags +]>; -def SDT_ARMBrcond : SDTypeProfile<0, 2, - [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>; +def SDT_ARMBrcond : SDTypeProfile<0, 2, [ + SDTCisVT<0, OtherVT>, // target basic block + SDTCisVT<1, CondCodeVT>, // condition code + SDTCisVT<2, FlagsVT>, // in flags +]>; def SDT_ARMBrJT : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>; @@ -53,7 +63,11 @@ def SDT_ARMAnd : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; -def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; +def SDT_ARMCmp : SDTypeProfile<1, 2, [ + SDTCisVT<0, FlagsVT>, // out flags + SDTCisInt<1>, // lhs + SDTCisSameAs<2, 1> // rhs +]>; def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; @@ -124,15 +138,17 @@ def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>; -def SDT_ARMCSel : SDTypeProfile<1, 3, - [SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisInt<3>, - SDTCisVT<3, i32>]>; +def SDT_ARMCSel : SDTypeProfile<1, 4, [ + /* any */ // result + SDTCisSameAs<1, 0>, // lhs + SDTCisSameAs<2, 0>, // rhs + SDTCisVT<3, CondCodeVT>, // condition code + SDTCisVT<3, FlagsVT> // in flags +]>; -def ARMcsinv : SDNode<"ARMISD::CSINV", SDT_ARMCSel, [SDNPOptInGlue]>; -def ARMcsneg : SDNode<"ARMISD::CSNEG", SDT_ARMCSel, [SDNPOptInGlue]>; -def ARMcsinc : SDNode<"ARMISD::CSINC", SDT_ARMCSel, [SDNPOptInGlue]>; +def ARMcsinv : SDNode<"ARMISD::CSINV", SDT_ARMCSel>; +def ARMcsneg : SDNode<"ARMISD::CSNEG", SDT_ARMCSel>; +def ARMcsinc : SDNode<"ARMISD::CSINC", SDT_ARMCSel>; def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>, SDTCisSameAs<0, 1>, @@ -173,15 +189,13 @@ def ARMseretglue : SDNode<"ARMISD::SERET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMintretglue : SDNode<"ARMISD::INTRET_GLUE", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; -def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, - [SDNPInGlue]>; +def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov>; def ARMssat : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; def ARMusat : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>; -def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, - [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; +def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain]>; def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, [SDNPHasChain]>; @@ -191,14 +205,11 @@ def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64, [SDNPHasChain]>; -def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp, - [SDNPOutGlue]>; +def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp>; -def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp, - [SDNPOutGlue]>; +def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp>; -def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, - [SDNPOutGlue, SDNPCommutative]>; +def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, [SDNPCommutative]>; def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; @@ -1776,7 +1787,7 @@ multiclass AI1_cmp_irs opcod, string opc, string rrDecoderMethod = ""> { def ri : AI1, + [(set CPSR, (opnode GPR:$Rn, mod_imm:$imm))]>, Sched<[WriteCMP, ReadALU]> { bits<4> Rn; bits<12> imm; @@ -1790,7 +1801,7 @@ multiclass AI1_cmp_irs opcod, string opc, } def rr : AI1, + [(set CPSR, (opnode GPR:$Rn, GPR:$Rm))]>, Sched<[WriteCMP, ReadALU, ReadALU]> { bits<4> Rn; bits<4> Rm; @@ -1808,7 +1819,7 @@ multiclass AI1_cmp_irs opcod, string opc, def rsi : AI1, + [(set CPSR, (opnode GPR:$Rn, so_reg_imm:$shift))]>, Sched<[WriteCMPsi, ReadALU]> { bits<4> Rn; bits<12> shift; @@ -1825,7 +1836,7 @@ multiclass AI1_cmp_irs opcod, string opc, def rsr : AI1, + [(set CPSR, (opnode GPRnopc:$Rn, so_reg_reg:$shift))]>, Sched<[WriteCMPsr, ReadALU]> { bits<4> Rn; bits<12> shift; @@ -4943,7 +4954,7 @@ def : ARMPat<(ARMcmpZ so_reg_reg:$rhs, 0), let isCompare = 1, Defs = [CPSR] in { def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iCMPi, "cmn", "\t$Rn, $imm", - [(ARMcmn GPR:$Rn, mod_imm:$imm)]>, + [(set CPSR, (ARMcmn GPR:$Rn, mod_imm:$imm))]>, Sched<[WriteCMP, ReadALU]> { bits<4> Rn; bits<12> imm; @@ -4959,8 +4970,8 @@ def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iCMPi, // CMN register-register/shift def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr, "cmn", "\t$Rn, $Rm", - [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPR:$Rn, GPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> { + [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> + GPR:$Rn, GPR:$Rm))]>, Sched<[WriteCMP, ReadALU, ReadALU]> { bits<4> Rn; bits<4> Rm; let isCommutable = 1; @@ -4977,8 +4988,8 @@ def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr, def CMNzrsi : AI1<0b1011, (outs), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr, "cmn", "\t$Rn, $shift", - [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPR:$Rn, so_reg_imm:$shift)]>, + [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> + GPR:$Rn, so_reg_imm:$shift))]>, Sched<[WriteCMPsi, ReadALU]> { bits<4> Rn; bits<12> shift; @@ -4996,8 +5007,8 @@ def CMNzrsi : AI1<0b1011, (outs), def CMNzrsr : AI1<0b1011, (outs), (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr, "cmn", "\t$Rn, $shift", - [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPRnopc:$Rn, so_reg_reg:$shift)]>, + [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> + GPRnopc:$Rn, so_reg_reg:$shift))]>, Sched<[WriteCMPsr, ReadALU]> { bits<4> Rn; bits<12> shift; @@ -5052,65 +5063,74 @@ let hasSideEffects = 0 in { let isCommutable = 1, isSelect = 1 in def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, GPR:$Rm, cmovpred:$p), - 4, IIC_iCMOVr, - [(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, - cmovpred:$p))]>, + (ins GPR:$false, GPR:$Rm, pred:$p), + 4, IIC_iCMOVr, []>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_reg_imm:$shift, cmovpred:$p), - 4, IIC_iCMOVsr, - [(set GPR:$Rd, - (ARMcmov GPR:$false, so_reg_imm:$shift, - cmovpred:$p))]>, + (ins GPR:$false, so_reg_imm:$shift, pred:$p), + 4, IIC_iCMOVsr, []>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_reg_reg:$shift, cmovpred:$p), - 4, IIC_iCMOVsr, - [(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift, - cmovpred:$p))]>, + (ins GPR:$false, so_reg_reg:$shift, pred:$p), + 4, IIC_iCMOVsr, []>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; let isMoveImm = 1 in def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, imm0_65535_expr:$imm, cmovpred:$p), - 4, IIC_iMOVi, - [(set GPR:$Rd, (ARMcmov GPR:$false, imm0_65535:$imm, - cmovpred:$p))]>, + (ins GPR:$false, imm0_65535_expr:$imm, pred:$p), + 4, IIC_iMOVi, []>, RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>, Sched<[WriteALU]>; let isMoveImm = 1 in def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, mod_imm:$imm, cmovpred:$p), - 4, IIC_iCMOVi, - [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm:$imm, - cmovpred:$p))]>, + (ins GPR:$false, mod_imm:$imm, pred:$p), + 4, IIC_iCMOVi, []>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; // Two instruction predicate mov immediate. let isMoveImm = 1 in def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, i32imm:$src, cmovpred:$p), - 8, IIC_iCMOVix2, - [(set GPR:$Rd, (ARMcmov GPR:$false, imm:$src, - cmovpred:$p))]>, + (ins GPR:$false, i32imm:$src, pred:$p), + 8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; let isMoveImm = 1 in def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, mod_imm:$imm, cmovpred:$p), - 4, IIC_iCMOVi, - [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm_not:$imm, - cmovpred:$p))]>, + (ins GPR:$false, mod_imm:$imm, pred:$p), + 4, IIC_iCMOVi, []>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; } // hasSideEffects +// The following patterns have to be defined out-of-line because the number +// of instruction operands does not match the number of SDNode operands +// (`pred` counts as one operand). + +def : ARMPat<(ARMcmov i32:$false, i32:$Rm, imm:$cc, CPSR), + (MOVCCr $false, $Rm, imm:$cc, CPSR)>; + +def : ARMPat<(ARMcmov i32:$false, so_reg_imm:$shift, imm:$cc, CPSR), + (MOVCCsi $false, so_reg_imm:$shift, imm:$cc, CPSR)>; + +def : ARMPat<(ARMcmov i32:$false, so_reg_reg:$shift, imm:$cc, CPSR), + (MOVCCsr $false, so_reg_reg:$shift, imm:$cc, CPSR)>; + +def : ARMV6T2Pat<(ARMcmov i32:$false, imm0_65535:$imm, imm:$cc, CPSR), + (MOVCCi16 $false, imm0_65535:$imm, imm:$cc, CPSR)>; + +def : ARMPat<(ARMcmov i32:$false, mod_imm:$imm, imm:$cc, CPSR), + (MOVCCi $false, mod_imm:$imm, imm:$cc, CPSR)>; + +def : ARMPat<(ARMcmov i32:$false, mod_imm_not:$imm, imm:$cc, CPSR), + (MVNCCi $false, mod_imm_not:$imm, imm:$cc, CPSR)>; + +def : ARMV6T2Pat<(ARMcmov i32:$false, imm:$src, imm:$cc, CPSR), + (MOVCCi32imm $false, imm:$src, imm:$cc, CPSR)>; //===----------------------------------------------------------------------===// // Atomic operations intrinsics diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index b92f42874bbdd..cc7fc743fe4f9 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -23,8 +23,7 @@ def imm_sr_XFORM: SDNodeXFormgetTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32); }]>; def ThumbSRImmAsmOperand: ImmAsmOperand<1,32> { let Name = "ImmThumbSR"; } -def imm_sr : Operand, PatLeaf<(imm), [{ - uint64_t Imm = N->getZExtValue(); +def imm_sr : Operand, ImmLeaf 0 && Imm <= 32; }], imm_sr_XFORM> { let PrintMethod = "printThumbSRImm"; @@ -1108,13 +1107,14 @@ let isCompare = 1, Defs = [CPSR] in { // T1pIDPEncode<0b1011, (outs), (ins tGPR:$lhs, tGPR:$rhs), // IIC_iCMPr, // "cmn", "\t$lhs, $rhs", -// [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>; +// [(set CPSR, (ARMcmp tGPR:$lhs, (ineg tGPR:$rhs)))]>; def tCMNz : // A8.6.33 T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iCMPr, "cmn", "\t$Rn, $Rm", - [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>, Sched<[WriteCMP]>; + [(set CPSR, (ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm)))]>, + Sched<[WriteCMP]>; } // isCompare = 1, Defs = [CPSR] @@ -1122,7 +1122,7 @@ def tCMNz : // A8.6.33 let isCompare = 1, Defs = [CPSR] in { def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, imm0_255:$imm8), IIC_iCMPi, "cmp", "\t$Rn, $imm8", - [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>, + [(set CPSR, (ARMcmp tGPR:$Rn, imm0_255:$imm8))]>, T1General<{1,0,1,?,?}>, Sched<[WriteCMP]> { // A8.6.35 bits<3> Rn; @@ -1136,7 +1136,7 @@ def tCMPr : // A8.6.36 T1 T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iCMPr, "cmp", "\t$Rn, $Rm", - [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>, Sched<[WriteCMP]>; + [(set CPSR, (ARMcmp tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteCMP]>; def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr, "cmp", "\t$Rn, $Rm", []>, @@ -1423,7 +1423,7 @@ let isCompare = 1, isCommutable = 1, Defs = [CPSR] in def tTST : // A8.6.230 T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr, "tst", "\t$Rn, $Rm", - [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>, + [(set CPSR, (ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0))]>, Sched<[WriteALU]>; // A8.8.247 UDF - Undefined (Encoding T1) @@ -1466,9 +1466,11 @@ def tUXTH : // A8.6.264 // Expanded after instruction selection into a branch sequence. let usesCustomInserter = 1 in // Expanded after instruction selection. def tMOVCCr_pseudo : - PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, cmovpred:$p), - NoItinerary, - [(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, cmovpred:$p))]>; + PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$p), + NoItinerary, []>; + +def : Pat<(ARMcmov tGPR:$false, tGPR:$true, imm:$cc, CPSR), + (tMOVCCr_pseudo $false, $true, imm:$cc, CPSR)>; // tLEApcrel - Load a pc-relative address into a register without offending the // assembler. diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index aa5c0a5889768..99617e53d657a 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -1144,7 +1144,8 @@ let isCompare = 1, Defs = [CPSR] in { def ri : T2OneRegCmpImm< (outs), (ins LHSGPR:$Rn, t2_so_imm:$imm), iii, opc, ".w\t$Rn, $imm", - [(opnode LHSGPR:$Rn, t2_so_imm:$imm)]>, Sched<[WriteCMP]> { + [(set CPSR, (opnode LHSGPR:$Rn, t2_so_imm:$imm))]>, + Sched<[WriteCMP]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; let Inst{24-21} = opcod; @@ -1156,7 +1157,8 @@ let isCompare = 1, Defs = [CPSR] in { def rr : T2TwoRegCmp< (outs), (ins LHSGPR:$Rn, rGPR:$Rm), iir, opc, ".w\t$Rn, $Rm", - [(opnode LHSGPR:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP]> { + [(set CPSR, (opnode LHSGPR:$Rn, rGPR:$Rm))]>, + Sched<[WriteCMP]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = opcod; @@ -1170,7 +1172,7 @@ let isCompare = 1, Defs = [CPSR] in { def rs : T2OneRegCmpShiftedReg< (outs), (ins LHSGPR:$Rn, t2_so_reg:$ShiftedRm), iis, opc, ".w\t$Rn, $ShiftedRm", - [(opnode LHSGPR:$Rn, t2_so_reg:$ShiftedRm)]>, + [(set CPSR, (opnode LHSGPR:$Rn, t2_so_reg:$ShiftedRm))]>, Sched<[WriteCMPsi]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -3477,7 +3479,7 @@ let isCompare = 1, Defs = [CPSR] in { def t2CMNri : T2OneRegCmpImm< (outs), (ins GPRnopc:$Rn, t2_so_imm:$imm), IIC_iCMPi, "cmn", ".w\t$Rn, $imm", - [(ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm))]>, + [(set CPSR, (ARMcmn GPRnopc:$Rn, (ineg t2_so_imm:$imm)))]>, Sched<[WriteCMP, ReadALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 0; @@ -3490,8 +3492,9 @@ let isCompare = 1, Defs = [CPSR] in { def t2CMNzrr : T2TwoRegCmp< (outs), (ins GPRnopc:$Rn, rGPR:$Rm), IIC_iCMPr, "cmn", ".w\t$Rn, $Rm", - [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPRnopc:$Rn, rGPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> { + [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> + GPRnopc:$Rn, rGPR:$Rm))]>, + Sched<[WriteCMP, ReadALU, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; let Inst{24-21} = 0b1000; @@ -3505,8 +3508,8 @@ let isCompare = 1, Defs = [CPSR] in { def t2CMNzrs : T2OneRegCmpShiftedReg< (outs), (ins GPRnopc:$Rn, t2_so_reg:$ShiftedRm), IIC_iCMPsi, "cmn", ".w\t$Rn, $ShiftedRm", - [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPRnopc:$Rn, t2_so_reg:$ShiftedRm)]>, + [(set CPSR, (BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> + GPRnopc:$Rn, t2_so_reg:$ShiftedRm))]>, Sched<[WriteCMPsi, ReadALU, ReadALU]> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b01; @@ -3542,67 +3545,84 @@ let hasSideEffects = 0 in { let isCommutable = 1, isSelect = 1 in def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), - (ins rGPR:$false, rGPR:$Rm, cmovpred:$p), - 4, IIC_iCMOVr, - [(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, - cmovpred:$p))]>, + (ins rGPR:$false, rGPR:$Rm, pred:$p), + 4, IIC_iCMOVr, []>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; let isMoveImm = 1 in def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd), - (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p), - 4, IIC_iCMOVi, - [(set rGPR:$Rd, (ARMcmov rGPR:$false,t2_so_imm:$imm, - cmovpred:$p))]>, + (ins rGPR:$false, t2_so_imm:$imm, pred:$p), + 4, IIC_iCMOVi, []>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; let isCodeGenOnly = 1 in { let isMoveImm = 1 in def t2MOVCCi16 : t2PseudoInst<(outs rGPR:$Rd), - (ins rGPR:$false, imm0_65535_expr:$imm, cmovpred:$p), - 4, IIC_iCMOVi, - [(set rGPR:$Rd, (ARMcmov rGPR:$false, imm0_65535:$imm, - cmovpred:$p))]>, + (ins rGPR:$false, imm0_65535_expr:$imm, pred:$p), + 4, IIC_iCMOVi, []>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; let isMoveImm = 1 in def t2MVNCCi : t2PseudoInst<(outs rGPR:$Rd), - (ins rGPR:$false, t2_so_imm:$imm, cmovpred:$p), - 4, IIC_iCMOVi, - [(set rGPR:$Rd, - (ARMcmov rGPR:$false, t2_so_imm_not:$imm, - cmovpred:$p))]>, + (ins rGPR:$false, t2_so_imm:$imm, pred:$p), + 4, IIC_iCMOVi, []>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; -class MOVCCShPseudo +class MOVCCShPseudo : t2PseudoInst<(outs rGPR:$Rd), - (ins rGPR:$false, rGPR:$Rm, i32imm:$imm, cmovpred:$p), - 4, IIC_iCMOVsi, - [(set rGPR:$Rd, (ARMcmov rGPR:$false, - (opnode rGPR:$Rm, (i32 ty:$imm)), - cmovpred:$p))]>, + (ins rGPR:$false, rGPR:$Rm, i32imm:$imm, pred:$p), + 4, IIC_iCMOVsi, []>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; -def t2MOVCClsl : MOVCCShPseudo; -def t2MOVCClsr : MOVCCShPseudo; -def t2MOVCCasr : MOVCCShPseudo; -def t2MOVCCror : MOVCCShPseudo; +def t2MOVCClsl : MOVCCShPseudo; +def t2MOVCClsr : MOVCCShPseudo; +def t2MOVCCasr : MOVCCShPseudo; +def t2MOVCCror : MOVCCShPseudo; let isMoveImm = 1 in def t2MOVCCi32imm : t2PseudoInst<(outs rGPR:$dst), - (ins rGPR:$false, i32imm:$src, cmovpred:$p), - 8, IIC_iCMOVix2, - [(set rGPR:$dst, (ARMcmov rGPR:$false, imm:$src, - cmovpred:$p))]>, + (ins rGPR:$false, i32imm:$src, pred:$p), + 8, IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">; } // isCodeGenOnly = 1 } // hasSideEffects +// The following patterns have to be defined out-of-line because the number +// of instruction operands does not match the number of SDNode operands +// (`pred` counts as one operand). + +def : T2Pat<(ARMcmov i32:$false, i32:$Rm, imm:$cc, CPSR), + (t2MOVCCr $false, $Rm, imm:$cc, CPSR)>; + +def : T2Pat<(ARMcmov i32:$false, t2_so_imm:$imm, imm:$cc, CPSR), + (t2MOVCCi $false, t2_so_imm:$imm, imm:$cc, CPSR)>; + +def : T2Pat<(ARMcmov i32:$false, imm0_65535:$imm, imm:$cc, CPSR), + (t2MOVCCi16 $false, imm0_65535:$imm, imm:$cc, CPSR)>; + +def : T2Pat<(ARMcmov i32:$false, t2_so_imm_not:$imm, imm:$cc, CPSR), + (t2MVNCCi $false, t2_so_imm_not:$imm, imm:$cc, CPSR)>; + +def : T2Pat<(ARMcmov i32:$false, (shl i32:$Rm, imm0_31:$imm), imm:$cc, CPSR), + (t2MOVCClsl $false, $Rm, imm0_31:$imm, imm:$cc, CPSR)>; + +def : T2Pat<(ARMcmov i32:$false, (srl i32:$Rm, imm_sr:$imm), imm:$cc, CPSR), + (t2MOVCClsr $false, $Rm, imm_sr:$imm, imm:$cc, CPSR)>; + +def : T2Pat<(ARMcmov i32:$false, (sra i32:$Rm, imm_sr:$imm), imm:$cc, CPSR), + (t2MOVCCasr $false, $Rm, imm_sr:$imm, imm:$cc, CPSR)>; + +def : T2Pat<(ARMcmov i32:$false, (rotr i32:$Rm, imm0_31:$imm), imm:$cc, CPSR), + (t2MOVCCror $false, $Rm, imm0_31:$imm, imm:$cc, CPSR)>; + +def : T2Pat<(ARMcmov i32:$false, imm:$src, imm:$cc, CPSR), + (t2MOVCCi32imm $false, imm:$src, imm:$cc, CPSR)>; + //===----------------------------------------------------------------------===// // Atomic operations intrinsics // @@ -5706,51 +5726,53 @@ def t2CSINC : CS<"csinc", 0b1001>; def t2CSINV : CS<"csinv", 0b1010>; def t2CSNEG : CS<"csneg", 0b1011>; -def ARMcsinc_su : PatFrag<(ops node:$lhs, node:$rhs, node:$cond), - (ARMcsinc node:$lhs, node:$rhs, node:$cond), [{ +def ARMcsinc_su + : PatFrag<(ops node:$lhs, node:$rhs, node:$cc, node:$flags), + (ARMcsinc node:$lhs, node:$rhs, node:$cc, node:$flags), [{ return N->hasOneUse(); }]>; let Predicates = [HasV8_1MMainline] in { multiclass CSPats { - def : T2Pat<(Node GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm), - (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>; - def : T2Pat<(Node (i32 0), GPRwithZR:$fval, imm0_31:$imm), - (Insn ZR, GPRwithZR:$fval, imm0_31:$imm)>; - def : T2Pat<(Node GPRwithZR:$tval, (i32 0), imm0_31:$imm), - (Insn GPRwithZR:$tval, ZR, imm0_31:$imm)>; - def : T2Pat<(Node (i32 0), (i32 0), imm0_31:$imm), - (Insn ZR, ZR, imm0_31:$imm)>; + def : T2Pat<(Node GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc, CPSR), + (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc)>; + def : T2Pat<(Node (i32 0), GPRwithZR:$fval, imm:$cc, CPSR), + (Insn ZR, GPRwithZR:$fval, imm:$cc)>; + def : T2Pat<(Node GPRwithZR:$tval, (i32 0), imm:$cc, CPSR), + (Insn GPRwithZR:$tval, ZR, imm:$cc)>; + def : T2Pat<(Node (i32 0), (i32 0), imm:$cc, CPSR), + (Insn ZR, ZR, imm:$cc)>; } defm : CSPats; defm : CSPats; defm : CSPats; - def : T2Pat<(ARMcmov (i32 1), (i32 0), cmovpred:$imm), - (t2CSINC ZR, ZR, imm0_31:$imm)>; - def : T2Pat<(ARMcmov (i32 -1), (i32 0), cmovpred:$imm), - (t2CSINV ZR, ZR, imm0_31:$imm)>; - def : T2Pat<(ARMcmov (i32 0), (i32 1), cmovpred:$imm), - (t2CSINC ZR, ZR, (inv_cond_XFORM imm:$imm))>; - def : T2Pat<(ARMcmov (i32 0), (i32 -1), cmovpred:$imm), - (t2CSINV ZR, ZR, (inv_cond_XFORM imm:$imm))>; + def : T2Pat<(ARMcmov (i32 1), (i32 0), imm:$cc, CPSR), + (t2CSINC ZR, ZR, imm:$cc)>; + def : T2Pat<(ARMcmov (i32 -1), (i32 0), imm:$cc, CPSR), + (t2CSINV ZR, ZR, imm:$cc)>; + def : T2Pat<(ARMcmov (i32 0), (i32 1), imm:$cc, CPSR), + (t2CSINC ZR, ZR, (inv_cond_XFORM imm:$cc))>; + def : T2Pat<(ARMcmov (i32 0), (i32 -1), imm:$cc, CPSR), + (t2CSINV ZR, ZR, (inv_cond_XFORM imm:$cc))>; multiclass ModifiedV8_1CSEL { - def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, cmovpred:$imm), - (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm0_31:$imm)>; - def : T2Pat<(ARMcmov GPRwithZR:$tval, modvalue, cmovpred:$imm), + def : T2Pat<(ARMcmov modvalue, GPRwithZR:$tval, imm:$cc, CPSR), + (Insn GPRwithZR:$tval, GPRwithZR:$fval, imm:$cc)>; + def : T2Pat<(ARMcmov GPRwithZR:$tval, modvalue, imm:$cc, CPSR), (Insn GPRwithZR:$tval, GPRwithZR:$fval, - (i32 (inv_cond_XFORM imm:$imm)))>; + (i32 (inv_cond_XFORM imm:$cc)))>; } defm : ModifiedV8_1CSEL; defm : ModifiedV8_1CSEL; defm : ModifiedV8_1CSEL; - def : T2Pat<(ARMcmov (topbitsallzero32:$Rn), (i32 1), cmovpred:$imm), - (t2CSINC $Rn, ZR, (inv_cond_XFORM imm:$imm))>; - def : T2Pat<(and (topbitsallzero32:$Rn), (ARMcsinc_su (i32 0), (i32 0), cmovpred:$imm)), - (t2CSEL ZR, $Rn, $imm)>; + def : T2Pat<(ARMcmov (topbitsallzero32:$Rn), (i32 1), imm:$cc, CPSR), + (t2CSINC $Rn, ZR, (inv_cond_XFORM imm:$cc))>; + def : T2Pat<(and (topbitsallzero32:$Rn), + (ARMcsinc_su (i32 0), (i32 0), imm:$cc, CPSR)), + (t2CSEL ZR, $Rn, imm:$cc)>; } // CS aliases. diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index a29753909ea99..754517f3bc4d5 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -34,10 +34,10 @@ def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_CMPFP>; def arm_cmpfpe0 : SDNode<"ARMISD::CMPFPEw0", SDT_CMPFP0>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", - SDTypeProfile<0, 1, [ - SDTCisVT<0, FlagsVT> // in flags - ]>, - [SDNPOutGlue] // TODO: Change Glue to a normal result. + SDTypeProfile<1, 1, [ + SDTCisVT<0, FlagsVT>, // out flags + SDTCisVT<1, FlagsVT> // in flags + ]> >; def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; @@ -562,19 +562,21 @@ multiclass vsel_inst opc, int CC> { def H : AHbInp<0b11100, opc, 0, (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm), NoItinerary, !strconcat("vsel", op, ".f16\t$Sd, $Sn, $Sm"), - [(set (f16 HPR:$Sd), (ARMcmov (f16 HPR:$Sm), (f16 HPR:$Sn), CC))]>, + [(set (f16 HPR:$Sd), + (ARMcmov (f16 HPR:$Sm), (f16 HPR:$Sn), CC, CPSR))]>, Requires<[HasFullFP16]>; def S : ASbInp<0b11100, opc, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), - [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>, + [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC, CPSR))]>, Requires<[HasFPARMv8]>; def D : ADbInp<0b11100, opc, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"), - [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>, + [(set DPR:$Dd, + (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC, CPSR))]>, Requires<[HasFPARMv8, HasDPVFP]>; } } @@ -2461,25 +2463,35 @@ def : Pat<(fneg (f16 (fma (fneg (f16 HPR:$Sn)), (f16 HPR:$Sm), (f16 HPR:$Sdin))) // let hasSideEffects = 0 in { -def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p), - IIC_fpUNA64, - [(set (f64 DPR:$Dd), - (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>, +def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p), + IIC_fpUNA64, []>, RegConstraint<"$Dn = $Dd">, Requires<[HasFPRegs64]>; -def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p), - IIC_fpUNA32, - [(set (f32 SPR:$Sd), - (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>, +def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p), + IIC_fpUNA32, []>, RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>; -def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, cmovpred:$p), - IIC_fpUNA16, - [(set (f16 HPR:$Sd), - (ARMcmov (f16 HPR:$Sn), (f16 HPR:$Sm), cmovpred:$p))]>, +def VMOVHcc : PseudoInst<(outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm, pred:$p), + IIC_fpUNA16, []>, RegConstraint<"$Sd = $Sn">, Requires<[HasFPRegs]>; } // hasSideEffects +// The following patterns have to be defined out-of-line because the number +// of instruction operands does not match the number of SDNode operands +// (`pred` counts as one operand). + +def : Pat<(ARMcmov f64:$Dn, f64:$Dm, imm:$cc, CPSR), + (VMOVDcc $Dn, $Dm, imm:$cc, CPSR)>, + Requires<[HasFPRegs64]>; + +def : Pat<(ARMcmov f32:$Sn, f32:$Sm, imm:$cc, CPSR), + (VMOVScc $Sn, $Sm, imm:$cc, CPSR)>, + Requires<[HasFPRegs]>; + +def : Pat<(ARMcmov f16:$Sn, f16:$Sm, imm:$cc, CPSR), + (VMOVHcc $Sn, $Sm, imm:$cc, CPSR)>, + Requires<[HasFPRegs]>; // FIXME: Shouldn't this be HasFPRegs16? + //===----------------------------------------------------------------------===// // Move from VFP System Register to ARM core register. // @@ -2510,7 +2522,7 @@ let DecoderMethod = "DecodeForVMRSandVMSR" in { Rt = 0b1111 /* apsr_nzcv */ in def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins), "vmrs", "\tAPSR_nzcv, fpscr", - [(arm_fmstat FPSCR_NZCV)]>; + [(set CPSR, (arm_fmstat FPSCR_NZCV))]>; // Application level FPSCR -> GPR let hasSideEffects = 1, Uses = [FPSCR], Predicates = [HasFPRegs] in diff --git a/llvm/test/CodeGen/ARM/add-like-or.ll b/llvm/test/CodeGen/ARM/add-like-or.ll index 5de03a92afeb4..c0ddee8388041 100644 --- a/llvm/test/CodeGen/ARM/add-like-or.ll +++ b/llvm/test/CodeGen/ARM/add-like-or.ll @@ -29,8 +29,8 @@ define i32 @test_add_i3(i1 %tst, i32 %a, i32 %b) { ; CHECK-T2: @ %bb.0: ; CHECK-T2-NEXT: .save {r4, lr} ; CHECK-T2-NEXT: push {r4, lr} -; CHECK-T2-NEXT: lsls r0, r0, #31 ; CHECK-T2-NEXT: bic r4, r2, #3 +; CHECK-T2-NEXT: lsls r0, r0, #31 ; CHECK-T2-NEXT: it ne ; CHECK-T2-NEXT: bicne r4, r1, #6 ; CHECK-T2-NEXT: mov r0, r4 @@ -144,12 +144,12 @@ define i32 @test_add_i12(i32 %a, i32 %b, i1 %tst) { ; ; CHECK-A-LABEL: test_add_i12: ; CHECK-A: @ %bb.0: -; CHECK-A-NEXT: bfc r1, #0, #13 ; CHECK-A-NEXT: bfc r0, #0, #12 +; CHECK-A-NEXT: bfc r1, #0, #13 ; CHECK-A-NEXT: tst r2, #1 -; CHECK-A-NEXT: moveq r0, r1 -; CHECK-A-NEXT: movw r1, #854 -; CHECK-A-NEXT: orr r0, r0, r1 +; CHECK-A-NEXT: movne r1, r0 +; CHECK-A-NEXT: movw r0, #854 +; CHECK-A-NEXT: orr r0, r1, r0 ; CHECK-A-NEXT: bx lr %tmp = and i32 %a, -4096 %tmp1 = and i32 %b, -8192 diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll index b6adc995091ce..75416475289f3 100644 --- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll +++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll @@ -1965,32 +1965,34 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t ; ARM-ENABLE-NEXT: @ %bb.1: @ %bb3 ; ARM-ENABLE-NEXT: push {r4, r7, lr} ; ARM-ENABLE-NEXT: add r7, sp, #4 -; ARM-ENABLE-NEXT: sub r4, sp, #16 +; ARM-ENABLE-NEXT: sub r4, sp, #24 ; ARM-ENABLE-NEXT: bfc r4, #0, #4 ; ARM-ENABLE-NEXT: mov sp, r4 ; ARM-ENABLE-NEXT: ldr r1, [r7, #8] +; ARM-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00 ; ARM-ENABLE-NEXT: mov r2, r3 ; ARM-ENABLE-NEXT: vst1.64 {d8, d9}, [r4:128] +; ARM-ENABLE-NEXT: vmov d9, r3, r1 ; ARM-ENABLE-NEXT: vmov s16, r0 ; ARM-ENABLE-NEXT: mov r0, r3 -; ARM-ENABLE-NEXT: vmov d9, r3, r1 ; ARM-ENABLE-NEXT: mov r3, r1 +; ARM-ENABLE-NEXT: vstr d10, [r4, #16] +; ARM-ENABLE-NEXT: vadd.f64 d10, d9, d16 ; ARM-ENABLE-NEXT: bl _pow ; ARM-ENABLE-NEXT: vmov.f32 s0, #1.000000e+00 ; ARM-ENABLE-NEXT: mov r4, sp -; ARM-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00 -; ARM-ENABLE-NEXT: vadd.f64 d16, d9, d16 +; ARM-ENABLE-NEXT: vmov.f64 d17, d9 +; ARM-ENABLE-NEXT: vmov d16, r0, r1 ; ARM-ENABLE-NEXT: vcmp.f32 s16, s0 ; ARM-ENABLE-NEXT: vmrs APSR_nzcv, fpscr -; ARM-ENABLE-NEXT: vmov d17, r0, r1 -; ARM-ENABLE-NEXT: vmov.f64 d18, d9 -; ARM-ENABLE-NEXT: vadd.f64 d17, d17, d17 -; ARM-ENABLE-NEXT: vmovgt.f64 d18, d16 -; ARM-ENABLE-NEXT: vcmp.f64 d18, d9 +; ARM-ENABLE-NEXT: vadd.f64 d16, d16, d16 +; ARM-ENABLE-NEXT: vmovgt.f64 d17, d10 +; ARM-ENABLE-NEXT: vcmp.f64 d17, d9 ; ARM-ENABLE-NEXT: vmrs APSR_nzcv, fpscr -; ARM-ENABLE-NEXT: vmovne.f64 d9, d17 +; ARM-ENABLE-NEXT: vmovne.f64 d9, d16 ; ARM-ENABLE-NEXT: vcvt.f32.f64 s0, d9 ; ARM-ENABLE-NEXT: vld1.64 {d8, d9}, [r4:128] +; ARM-ENABLE-NEXT: vldr d10, [r4, #16] ; ARM-ENABLE-NEXT: sub sp, r7, #4 ; ARM-ENABLE-NEXT: pop {r4, r7, lr} ; ARM-ENABLE-NEXT: vmov r0, s0 @@ -2012,32 +2014,33 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t ; ARM-DISABLE-NEXT: @ %bb.0: @ %bb ; ARM-DISABLE-NEXT: push {r4, r7, lr} ; ARM-DISABLE-NEXT: add r7, sp, #4 -; ARM-DISABLE-NEXT: sub r4, sp, #16 +; ARM-DISABLE-NEXT: sub r4, sp, #24 ; ARM-DISABLE-NEXT: bfc r4, #0, #4 ; ARM-DISABLE-NEXT: mov sp, r4 ; ARM-DISABLE-NEXT: tst r2, #1 ; ARM-DISABLE-NEXT: vst1.64 {d8, d9}, [r4:128] +; ARM-DISABLE-NEXT: vstr d10, [r4, #16] ; ARM-DISABLE-NEXT: beq LBB12_2 ; ARM-DISABLE-NEXT: @ %bb.1: @ %bb3 ; ARM-DISABLE-NEXT: ldr r1, [r7, #8] -; ARM-DISABLE-NEXT: vmov s16, r0 -; ARM-DISABLE-NEXT: mov r0, r3 +; ARM-DISABLE-NEXT: vmov.f64 d16, #1.000000e+00 ; ARM-DISABLE-NEXT: mov r2, r3 ; ARM-DISABLE-NEXT: vmov d9, r3, r1 +; ARM-DISABLE-NEXT: vmov s16, r0 +; ARM-DISABLE-NEXT: mov r0, r3 ; ARM-DISABLE-NEXT: mov r3, r1 +; ARM-DISABLE-NEXT: vadd.f64 d10, d9, d16 ; ARM-DISABLE-NEXT: bl _pow ; ARM-DISABLE-NEXT: vmov.f32 s0, #1.000000e+00 -; ARM-DISABLE-NEXT: vmov.f64 d16, #1.000000e+00 -; ARM-DISABLE-NEXT: vadd.f64 d16, d9, d16 +; ARM-DISABLE-NEXT: vmov.f64 d17, d9 +; ARM-DISABLE-NEXT: vmov d16, r0, r1 ; ARM-DISABLE-NEXT: vcmp.f32 s16, s0 ; ARM-DISABLE-NEXT: vmrs APSR_nzcv, fpscr -; ARM-DISABLE-NEXT: vmov d17, r0, r1 -; ARM-DISABLE-NEXT: vmov.f64 d18, d9 -; ARM-DISABLE-NEXT: vadd.f64 d17, d17, d17 -; ARM-DISABLE-NEXT: vmovgt.f64 d18, d16 -; ARM-DISABLE-NEXT: vcmp.f64 d18, d9 +; ARM-DISABLE-NEXT: vadd.f64 d16, d16, d16 +; ARM-DISABLE-NEXT: vmovgt.f64 d17, d10 +; ARM-DISABLE-NEXT: vcmp.f64 d17, d9 ; ARM-DISABLE-NEXT: vmrs APSR_nzcv, fpscr -; ARM-DISABLE-NEXT: vmovne.f64 d9, d17 +; ARM-DISABLE-NEXT: vmovne.f64 d9, d16 ; ARM-DISABLE-NEXT: vcvt.f32.f64 s0, d9 ; ARM-DISABLE-NEXT: b LBB12_3 ; ARM-DISABLE-NEXT: LBB12_2: @@ -2046,6 +2049,7 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t ; ARM-DISABLE-NEXT: mov r4, sp ; ARM-DISABLE-NEXT: vld1.64 {d8, d9}, [r4:128] ; ARM-DISABLE-NEXT: vmov r0, s0 +; ARM-DISABLE-NEXT: vldr d10, [r4, #16] ; ARM-DISABLE-NEXT: sub sp, r7, #4 ; ARM-DISABLE-NEXT: pop {r4, r7, pc} ; ARM-DISABLE-NEXT: .p2align 2 @@ -2064,34 +2068,36 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t ; THUMB-ENABLE-NEXT: @ %bb.1: @ %bb3 ; THUMB-ENABLE-NEXT: push {r4, r7, lr} ; THUMB-ENABLE-NEXT: add r7, sp, #4 -; THUMB-ENABLE-NEXT: sub.w r4, sp, #16 +; THUMB-ENABLE-NEXT: sub.w r4, sp, #24 ; THUMB-ENABLE-NEXT: bfc r4, #0, #4 ; THUMB-ENABLE-NEXT: mov sp, r4 ; THUMB-ENABLE-NEXT: ldr r1, [r7, #8] +; THUMB-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00 ; THUMB-ENABLE-NEXT: mov r2, r3 ; THUMB-ENABLE-NEXT: vst1.64 {d8, d9}, [r4:128] +; THUMB-ENABLE-NEXT: vmov d9, r3, r1 ; THUMB-ENABLE-NEXT: vmov s16, r0 ; THUMB-ENABLE-NEXT: mov r0, r3 -; THUMB-ENABLE-NEXT: vmov d9, r3, r1 ; THUMB-ENABLE-NEXT: mov r3, r1 +; THUMB-ENABLE-NEXT: vstr d10, [r4, #16] +; THUMB-ENABLE-NEXT: vadd.f64 d10, d9, d16 ; THUMB-ENABLE-NEXT: bl _pow ; THUMB-ENABLE-NEXT: vmov.f32 s0, #1.000000e+00 ; THUMB-ENABLE-NEXT: mov r4, sp -; THUMB-ENABLE-NEXT: vmov.f64 d16, #1.000000e+00 -; THUMB-ENABLE-NEXT: vmov.f64 d18, d9 +; THUMB-ENABLE-NEXT: vmov.f64 d17, d9 +; THUMB-ENABLE-NEXT: vmov d16, r0, r1 ; THUMB-ENABLE-NEXT: vcmp.f32 s16, s0 -; THUMB-ENABLE-NEXT: vadd.f64 d16, d9, d16 ; THUMB-ENABLE-NEXT: vmrs APSR_nzcv, fpscr ; THUMB-ENABLE-NEXT: it gt -; THUMB-ENABLE-NEXT: vmovgt.f64 d18, d16 -; THUMB-ENABLE-NEXT: vcmp.f64 d18, d9 -; THUMB-ENABLE-NEXT: vmov d17, r0, r1 +; THUMB-ENABLE-NEXT: vmovgt.f64 d17, d10 +; THUMB-ENABLE-NEXT: vcmp.f64 d17, d9 +; THUMB-ENABLE-NEXT: vadd.f64 d16, d16, d16 ; THUMB-ENABLE-NEXT: vmrs APSR_nzcv, fpscr -; THUMB-ENABLE-NEXT: vadd.f64 d17, d17, d17 ; THUMB-ENABLE-NEXT: it ne -; THUMB-ENABLE-NEXT: vmovne.f64 d9, d17 +; THUMB-ENABLE-NEXT: vmovne.f64 d9, d16 ; THUMB-ENABLE-NEXT: vcvt.f32.f64 s0, d9 ; THUMB-ENABLE-NEXT: vld1.64 {d8, d9}, [r4:128] +; THUMB-ENABLE-NEXT: vldr d10, [r4, #16] ; THUMB-ENABLE-NEXT: subs r4, r7, #4 ; THUMB-ENABLE-NEXT: mov sp, r4 ; THUMB-ENABLE-NEXT: pop.w {r4, r7, lr} @@ -2114,34 +2120,35 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t ; THUMB-DISABLE-NEXT: @ %bb.0: @ %bb ; THUMB-DISABLE-NEXT: push {r4, r7, lr} ; THUMB-DISABLE-NEXT: add r7, sp, #4 -; THUMB-DISABLE-NEXT: sub.w r4, sp, #16 +; THUMB-DISABLE-NEXT: sub.w r4, sp, #24 ; THUMB-DISABLE-NEXT: bfc r4, #0, #4 ; THUMB-DISABLE-NEXT: mov sp, r4 ; THUMB-DISABLE-NEXT: lsls r1, r2, #31 ; THUMB-DISABLE-NEXT: vst1.64 {d8, d9}, [r4:128] +; THUMB-DISABLE-NEXT: vstr d10, [r4, #16] ; THUMB-DISABLE-NEXT: beq LBB12_2 ; THUMB-DISABLE-NEXT: @ %bb.1: @ %bb3 ; THUMB-DISABLE-NEXT: ldr r1, [r7, #8] -; THUMB-DISABLE-NEXT: vmov s16, r0 -; THUMB-DISABLE-NEXT: mov r0, r3 +; THUMB-DISABLE-NEXT: vmov.f64 d16, #1.000000e+00 ; THUMB-DISABLE-NEXT: mov r2, r3 ; THUMB-DISABLE-NEXT: vmov d9, r3, r1 +; THUMB-DISABLE-NEXT: vmov s16, r0 +; THUMB-DISABLE-NEXT: mov r0, r3 ; THUMB-DISABLE-NEXT: mov r3, r1 +; THUMB-DISABLE-NEXT: vadd.f64 d10, d9, d16 ; THUMB-DISABLE-NEXT: bl _pow ; THUMB-DISABLE-NEXT: vmov.f32 s0, #1.000000e+00 -; THUMB-DISABLE-NEXT: vmov.f64 d16, #1.000000e+00 -; THUMB-DISABLE-NEXT: vmov.f64 d18, d9 +; THUMB-DISABLE-NEXT: vmov.f64 d17, d9 +; THUMB-DISABLE-NEXT: vmov d16, r0, r1 ; THUMB-DISABLE-NEXT: vcmp.f32 s16, s0 -; THUMB-DISABLE-NEXT: vadd.f64 d16, d9, d16 ; THUMB-DISABLE-NEXT: vmrs APSR_nzcv, fpscr ; THUMB-DISABLE-NEXT: it gt -; THUMB-DISABLE-NEXT: vmovgt.f64 d18, d16 -; THUMB-DISABLE-NEXT: vcmp.f64 d18, d9 -; THUMB-DISABLE-NEXT: vmov d17, r0, r1 +; THUMB-DISABLE-NEXT: vmovgt.f64 d17, d10 +; THUMB-DISABLE-NEXT: vcmp.f64 d17, d9 +; THUMB-DISABLE-NEXT: vadd.f64 d16, d16, d16 ; THUMB-DISABLE-NEXT: vmrs APSR_nzcv, fpscr -; THUMB-DISABLE-NEXT: vadd.f64 d17, d17, d17 ; THUMB-DISABLE-NEXT: it ne -; THUMB-DISABLE-NEXT: vmovne.f64 d9, d17 +; THUMB-DISABLE-NEXT: vmovne.f64 d9, d16 ; THUMB-DISABLE-NEXT: vcvt.f32.f64 s0, d9 ; THUMB-DISABLE-NEXT: b LBB12_3 ; THUMB-DISABLE-NEXT: LBB12_2: @@ -2149,8 +2156,9 @@ define float @debug_info(float %gamma, float %slopeLimit, i1 %or.cond, double %t ; THUMB-DISABLE-NEXT: LBB12_3: @ %bb13 ; THUMB-DISABLE-NEXT: mov r4, sp ; THUMB-DISABLE-NEXT: vld1.64 {d8, d9}, [r4:128] -; THUMB-DISABLE-NEXT: subs r4, r7, #4 ; THUMB-DISABLE-NEXT: vmov r0, s0 +; THUMB-DISABLE-NEXT: vldr d10, [r4, #16] +; THUMB-DISABLE-NEXT: subs r4, r7, #4 ; THUMB-DISABLE-NEXT: mov sp, r4 ; THUMB-DISABLE-NEXT: pop {r4, r7, pc} ; THUMB-DISABLE-NEXT: .p2align 2 diff --git a/llvm/test/CodeGen/ARM/atomic-64bit.ll b/llvm/test/CodeGen/ARM/atomic-64bit.ll index ab9e1dfd1cfb1..ca9939c0f8c55 100644 --- a/llvm/test/CodeGen/ARM/atomic-64bit.ll +++ b/llvm/test/CodeGen/ARM/atomic-64bit.ll @@ -278,12 +278,9 @@ define i64 @test10(ptr %ptr, i64 %val) { ; CHECK-BE: subs {{[^,]+}}, r2, [[REG2]] ; CHECK-LE: sbcs {{[^,]+}}, r2, [[REG2]] ; CHECK-BE: sbcs {{[^,]+}}, r1, [[REG1]] -; CHECK: mov [[CMP:[a-z0-9]+]], #0 -; CHECK: movwge [[CMP]], #1 -; CHECK: cmp [[CMP]], #0 -; CHECK: movne [[OUT_HI]], [[REG2]] +; CHECK: movge [[OUT_HI]], [[REG2]] ; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 -; CHECK: movne [[OUT_LO]], [[REG1]] +; CHECK: movge [[OUT_LO]], [[REG1]] ; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] ; CHECK: cmp ; CHECK: bne @@ -297,12 +294,10 @@ define i64 @test10(ptr %ptr, i64 %val) { ; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]] ; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]] ; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]] -; CHECK-THUMB: mov.w [[CMP:[a-z0-9]+]], #0 -; CHECK-THUMB: movge.w [[CMP]], #1 -; CHECK-THUMB: cmp.w [[CMP]], #0 ; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 -; CHECK-THUMB: movne [[OUT_HI]], [[REG2]] -; CHECK-THUMB: movne [[OUT_LO]], [[REG1]] +; CHECK-THUMB: itt ge +; CHECK-THUMB: movge [[OUT_HI]], [[REG2]] +; CHECK-THUMB: movge [[OUT_LO]], [[REG1]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -323,12 +318,9 @@ define i64 @test11(ptr %ptr, i64 %val) { ; CHECK-BE: subs {{[^,]+}}, r2, [[REG2]] ; CHECK-LE: sbcs {{[^,]+}}, r2, [[REG2]] ; CHECK-BE: sbcs {{[^,]+}}, r1, [[REG1]] -; CHECK: mov [[CMP:[a-z0-9]+]], #0 -; CHECK: movwhs [[CMP]], #1 -; CHECK: cmp [[CMP]], #0 -; CHECK: movne [[OUT_HI]], [[REG2]] +; CHECK: movhs [[OUT_HI]], [[REG2]] ; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 -; CHECK: movne [[OUT_LO]], [[REG1]] +; CHECK: movhs [[OUT_LO]], [[REG1]] ; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] ; CHECK: cmp ; CHECK: bne @@ -342,12 +334,10 @@ define i64 @test11(ptr %ptr, i64 %val) { ; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]] ; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]] ; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]] -; CHECK-THUMB: mov.w [[CMP:[a-z0-9]+]], #0 -; CHECK-THUMB: movhs.w [[CMP]], #1 -; CHECK-THUMB: cmp.w [[CMP]], #0 ; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 -; CHECK-THUMB: movne [[OUT_HI]], [[REG2]] -; CHECK-THUMB: movne [[OUT_LO]], [[REG1]] +; CHECK-THUMB: itt hs +; CHECK-THUMB: movhs [[OUT_HI]], [[REG2]] +; CHECK-THUMB: movhs [[OUT_LO]], [[REG1]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -368,12 +358,9 @@ define i64 @test12(ptr %ptr, i64 %val) { ; CHECK-BE: subs {{[^,]+}}, r2, [[REG2]] ; CHECK-LE: sbcs {{[^,]+}}, r2, [[REG2]] ; CHECK-BE: sbcs {{[^,]+}}, r1, [[REG1]] -; CHECK: mov [[CMP:[a-z0-9]+]], #0 -; CHECK: movwlt [[CMP]], #1 -; CHECK: cmp [[CMP]], #0 -; CHECK: movne [[OUT_HI]], [[REG2]] +; CHECK: movlt [[OUT_HI]], [[REG2]] ; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 -; CHECK: movne [[OUT_LO]], [[REG1]] +; CHECK: movlt [[OUT_LO]], [[REG1]] ; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] ; CHECK: cmp ; CHECK: bne @@ -387,12 +374,10 @@ define i64 @test12(ptr %ptr, i64 %val) { ; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]] ; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]] ; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]] -; CHECK-THUMB: mov.w [[CMP:[a-z0-9]+]], #0 -; CHECK-THUMB: movlt.w [[CMP]], #1 -; CHECK-THUMB: cmp.w [[CMP]], #0 ; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 -; CHECK-THUMB: movne [[OUT_HI]], [[REG2]] -; CHECK-THUMB: movne [[OUT_LO]], [[REG1]] +; CHECK-THUMB: itt lt +; CHECK-THUMB: movlt [[OUT_HI]], [[REG2]] +; CHECK-THUMB: movlt [[OUT_LO]], [[REG1]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne @@ -413,12 +398,9 @@ define i64 @test13(ptr %ptr, i64 %val) { ; CHECK-BE: subs {{[^,]+}}, r2, [[REG2]] ; CHECK-LE: sbcs {{[^,]+}}, r2, [[REG2]] ; CHECK-BE: sbcs {{[^,]+}}, r1, [[REG1]] -; CHECK: mov [[CMP:[a-z0-9]+]], #0 -; CHECK: movwlo [[CMP]], #1 -; CHECK: cmp [[CMP]], #0 -; CHECK: movne [[OUT_HI]], [[REG2]] +; CHECK: movlo [[OUT_HI]], [[REG2]] ; CHECK: mov [[OUT_LO:[a-z0-9]+]], r1 -; CHECK: movne [[OUT_LO]], [[REG1]] +; CHECK: movlo [[OUT_LO]], [[REG1]] ; CHECK: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] ; CHECK: cmp ; CHECK: bne @@ -432,12 +414,10 @@ define i64 @test13(ptr %ptr, i64 %val) { ; CHECK-THUMB-BE: subs.w {{[^,]+}}, r3, [[REG2]] ; CHECK-THUMB-LE: sbcs.w {{[^,]+}}, r3, [[REG2]] ; CHECK-THUMB-BE: sbcs.w {{[^,]+}}, r2, [[REG1]] -; CHECK-THUMB: mov.w [[CMP:[a-z0-9]+]], #0 -; CHECK-THUMB: movlo.w [[CMP]], #1 -; CHECK-THUMB: cmp.w [[CMP]], #0 ; CHECK-THUMB: mov [[OUT_HI:[a-z0-9]+]], r3 -; CHECK-THUMB: movne [[OUT_HI]], [[REG2]] -; CHECK-THUMB: movne [[OUT_LO]], [[REG1]] +; CHECK-THUMB: itt lo +; CHECK-THUMB: movlo [[OUT_HI]], [[REG2]] +; CHECK-THUMB: movlo [[OUT_LO]], [[REG1]] ; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[OUT_LO]], [[OUT_HI]] ; CHECK-THUMB: cmp ; CHECK-THUMB: bne diff --git a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll index 0a467c2b70acf..d48b070aa862e 100644 --- a/llvm/test/CodeGen/ARM/atomic-ops-v8.ll +++ b/llvm/test/CodeGen/ARM/atomic-ops-v8.ll @@ -672,12 +672,9 @@ define void @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]] ; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]] ; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]] -; CHECK-ARM: mov [[CMP:r[0-9]+|lr]], #0 -; CHECK-ARM: movwge [[CMP:r[0-9]+|lr]], #1 -; CHECK-ARM: cmp [[CMP:r[0-9]+|lr]], #0 -; CHECK-ARM: movne [[MINHI]], [[OLD2]] +; CHECK-ARM: movge [[MINHI]], [[OLD2]] ; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 -; CHECK-ARM: movne [[MINLO]], [[OLD1]] +; CHECK-ARM: movge [[MINLO]], [[OLD1]] ; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] ; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 @@ -785,12 +782,9 @@ define void @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]] ; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]] ; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]] -; CHECK-ARM: mov [[CMP:r[0-9]+|lr]], #0 -; CHECK-ARM: movwlt [[CMP:r[0-9]+|lr]], #1 -; CHECK-ARM: cmp [[CMP:r[0-9]+|lr]], #0 -; CHECK-ARM: movne [[MINHI]], [[OLD2]] +; CHECK-ARM: movlt [[MINHI]], [[OLD2]] ; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 -; CHECK-ARM: movne [[MINLO]], [[OLD1]] +; CHECK-ARM: movlt [[MINLO]], [[OLD1]] ; CHECK-ARM: strexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] ; CHECK-THUMB: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 @@ -898,12 +892,9 @@ define void @test_atomic_load_umin_i64(i64 %offset) nounwind { ; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]] ; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]] ; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]] -; CHECK-ARM: mov [[CMP:r[0-9]+|lr]], #0 -; CHECK-ARM: movwhs [[CMP:r[0-9]+|lr]], #1 -; CHECK-ARM: cmp [[CMP:r[0-9]+|lr]], #0 -; CHECK-ARM: movne [[MINHI]], [[OLD2]] +; CHECK-ARM: movhs [[MINHI]], [[OLD2]] ; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 -; CHECK-ARM: movne [[MINLO]], [[OLD1]] +; CHECK-ARM: movhs [[MINLO]], [[OLD1]] ; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] ; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 @@ -1011,12 +1002,9 @@ define void @test_atomic_load_umax_i64(i64 %offset) nounwind { ; CHECK-ARM-LE: sbcs {{[^,]+}}, r1, [[OLD2]] ; CHECK-ARM-BE: subs {{[^,]+}}, r1, [[OLD2]] ; CHECK-ARM-BE: sbcs {{[^,]+}}, r0, [[OLD1]] -; CHECK-ARM: mov [[CMP:r[0-9]+|lr]], #0 -; CHECK-ARM: movwlo [[CMP:r[0-9]+|lr]], #1 -; CHECK-ARM: cmp [[CMP:r[0-9]+|lr]], #0 -; CHECK-ARM: movne [[MINHI]], [[OLD2]] +; CHECK-ARM: movlo [[MINHI]], [[OLD2]] ; CHECK-ARM: mov [[MINLO:r[0-9]+]], r0 -; CHECK-ARM: movne [[MINLO]], [[OLD1]] +; CHECK-ARM: movlo [[MINLO]], [[OLD1]] ; CHECK-ARM: stlexd [[STATUS:r[0-9]+]], [[MINLO]], [[MINHI]], [r[[ADDR]]] ; CHECK-THUMB: stlexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r[[ADDR]]] ; CHECK-NEXT: cmp [[STATUS]], #0 diff --git a/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll index 62711ee683489..8706728c4b841 100644 --- a/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll +++ b/llvm/test/CodeGen/ARM/atomicrmw-cond-sub-clamp.ll @@ -68,27 +68,23 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) { define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) { ; CHECK-LABEL: atomicrmw_usub_cond_i64: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: .save {r4, r5, r11, lr} +; CHECK-NEXT: push {r4, r5, r11, lr} +; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: dmb ish ; CHECK-NEXT: .LBB3_1: @ %atomicrmw.start ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrexd r4, r5, [r0] -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: subs r6, r4, r2 -; CHECK-NEXT: sbcs r7, r5, r3 -; CHECK-NEXT: movwhs r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: moveq r7, r5 -; CHECK-NEXT: moveq r6, r4 -; CHECK-NEXT: strexd r1, r6, r7, [r0] -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: ldrexd r0, r1, [r12] +; CHECK-NEXT: subs r4, r0, r2 +; CHECK-NEXT: sbcs r5, r1, r3 +; CHECK-NEXT: movlo r5, r1 +; CHECK-NEXT: movlo r4, r0 +; CHECK-NEXT: strexd lr, r4, r5, [r12] +; CHECK-NEXT: cmp lr, #0 ; CHECK-NEXT: bne .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: dmb ish -; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-NEXT: pop {r4, r5, r11, pc} %result = atomicrmw usub_cond ptr %ptr, i64 %val seq_cst ret i64 %result } @@ -164,7 +160,7 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) { ; CHECK-NEXT: subs r6, r4, r2 ; CHECK-NEXT: sbcs r7, r5, r3 ; CHECK-NEXT: adc r1, r12, #0 -; CHECK-NEXT: eors r1, r1, #1 +; CHECK-NEXT: teq r1, #1 ; CHECK-NEXT: movwne r7, #0 ; CHECK-NEXT: movwne r6, #0 ; CHECK-NEXT: strexd r1, r6, r7, [r0] diff --git a/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll index 243ec4deecdb8..433fb325a7349 100644 --- a/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll @@ -69,29 +69,25 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; CHECK-LABEL: atomicrmw_uinc_wrap_i64: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: .save {r4, r6, r7, lr} +; CHECK-NEXT: push {r4, r6, r7, lr} +; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: dmb ish ; CHECK-NEXT: .LBB3_1: @ %atomicrmw.start ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrexd r4, r5, [r0] -; CHECK-NEXT: adds r6, r4, #1 -; CHECK-NEXT: adc r7, r5, #0 -; CHECK-NEXT: subs r1, r4, r2 -; CHECK-NEXT: sbcs r1, r5, r3 -; CHECK-NEXT: mov r1, #0 -; CHECK-NEXT: movwhs r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: movwne r7, #0 -; CHECK-NEXT: movwne r6, #0 -; CHECK-NEXT: strexd r1, r6, r7, [r0] -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: ldrexd r0, r1, [r12] +; CHECK-NEXT: adds r6, r0, #1 +; CHECK-NEXT: adc r7, r1, #0 +; CHECK-NEXT: subs r4, r0, r2 +; CHECK-NEXT: sbcs r4, r1, r3 +; CHECK-NEXT: movwhs r7, #0 +; CHECK-NEXT: movwhs r6, #0 +; CHECK-NEXT: strexd r4, r6, r7, [r12] +; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: bne .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %atomicrmw.end -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: dmb ish -; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-NEXT: pop {r4, r6, r7, pc} %result = atomicrmw uinc_wrap ptr %ptr, i64 %val seq_cst ret i64 %result } @@ -102,8 +98,8 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; CHECK-NEXT: dmb ish ; CHECK-NEXT: .LBB4_1: @ %atomicrmw.start ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: uxtb r3, r1 ; CHECK-NEXT: ldrexb r12, [r0] +; CHECK-NEXT: uxtb r3, r1 ; CHECK-NEXT: cmp r12, r3 ; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: subls r3, r12, #1 @@ -126,8 +122,8 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; CHECK-NEXT: dmb ish ; CHECK-NEXT: .LBB5_1: @ %atomicrmw.start ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: uxth r3, r1 ; CHECK-NEXT: ldrexh r12, [r0] +; CHECK-NEXT: uxth r3, r1 ; CHECK-NEXT: cmp r12, r3 ; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: subls r3, r12, #1 diff --git a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll index a38ade7cdbf06..f633315822cc3 100644 --- a/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll +++ b/llvm/test/CodeGen/ARM/atomicrmw_exclusive_monitor_ints.ll @@ -1422,8 +1422,8 @@ define i8 @test_max_i8() { ; CHECK-ARM8-NEXT: @ Child Loop BB7_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: sxtb r0, r1 -; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: cmp r0, #1 +; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: movgt r12, r1 ; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 @@ -1468,8 +1468,8 @@ define i8 @test_max_i8() { ; CHECK-ARM6-NEXT: @ Child Loop BB7_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: sxtb r0, r1 -; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: cmp r0, #1 +; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: movgt r12, r1 ; CHECK-ARM6-NEXT: ldr r3, .LCPI7_0 ; CHECK-ARM6-NEXT: uxtb r1, r1 @@ -1518,8 +1518,8 @@ define i8 @test_max_i8() { ; CHECK-THUMB7-NEXT: @ Child Loop BB7_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: sxtb r0, r1 -; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: cmp r0, #1 +; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: it gt ; CHECK-THUMB7-NEXT: movgt r12, r1 ; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i8 @@ -1643,8 +1643,8 @@ define i8 @test_min_i8() { ; CHECK-ARM8-NEXT: @ Child Loop BB8_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: sxtb r0, r1 -; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: cmp r0, #2 +; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: movlt r12, r1 ; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 @@ -1689,8 +1689,8 @@ define i8 @test_min_i8() { ; CHECK-ARM6-NEXT: @ Child Loop BB8_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: sxtb r0, r1 -; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: cmp r0, #2 +; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: movlt r12, r1 ; CHECK-ARM6-NEXT: ldr r3, .LCPI8_0 ; CHECK-ARM6-NEXT: uxtb r1, r1 @@ -1739,8 +1739,8 @@ define i8 @test_min_i8() { ; CHECK-THUMB7-NEXT: @ Child Loop BB8_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: sxtb r0, r1 -; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: cmp r0, #2 +; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: it lt ; CHECK-THUMB7-NEXT: movlt r12, r1 ; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i8 @@ -1866,8 +1866,8 @@ define i8 @test_umax_i8() { ; CHECK-ARM8-NEXT: @ Child Loop BB9_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r12, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: uxtb r1, r12 -; CHECK-ARM8-NEXT: mov lr, #1 ; CHECK-ARM8-NEXT: cmp r1, #1 +; CHECK-ARM8-NEXT: mov lr, #1 ; CHECK-ARM8-NEXT: movhi lr, r12 ; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 @@ -1913,8 +1913,8 @@ define i8 @test_umax_i8() { ; CHECK-ARM6-NEXT: @ Child Loop BB9_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: uxtb r1, r12 -; CHECK-ARM6-NEXT: mov lr, #1 ; CHECK-ARM6-NEXT: cmp r1, #1 +; CHECK-ARM6-NEXT: mov lr, #1 ; CHECK-ARM6-NEXT: movhi lr, r12 ; CHECK-ARM6-NEXT: ldr r3, .LCPI9_0 ; CHECK-ARM6-NEXT: uxtb r12, r12 @@ -1964,8 +1964,8 @@ define i8 @test_umax_i8() { ; CHECK-THUMB7-NEXT: @ Child Loop BB9_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: uxtb r1, r4 -; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: cmp r1, #1 +; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: it hi ; CHECK-THUMB7-NEXT: movhi r12, r4 ; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i8 @@ -2091,8 +2091,8 @@ define i8 @test_umin_i8() { ; CHECK-ARM8-NEXT: @ Child Loop BB10_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r12, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: uxtb r1, r12 -; CHECK-ARM8-NEXT: mov lr, #1 ; CHECK-ARM8-NEXT: cmp r1, #2 +; CHECK-ARM8-NEXT: mov lr, #1 ; CHECK-ARM8-NEXT: movlo lr, r12 ; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i8 ; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i8 @@ -2138,8 +2138,8 @@ define i8 @test_umin_i8() { ; CHECK-ARM6-NEXT: @ Child Loop BB10_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: uxtb r1, r12 -; CHECK-ARM6-NEXT: mov lr, #1 ; CHECK-ARM6-NEXT: cmp r1, #2 +; CHECK-ARM6-NEXT: mov lr, #1 ; CHECK-ARM6-NEXT: movlo lr, r12 ; CHECK-ARM6-NEXT: ldr r3, .LCPI10_0 ; CHECK-ARM6-NEXT: uxtb r12, r12 @@ -2189,8 +2189,8 @@ define i8 @test_umin_i8() { ; CHECK-THUMB7-NEXT: @ Child Loop BB10_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: uxtb r1, r4 -; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: cmp r1, #2 +; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: it lo ; CHECK-THUMB7-NEXT: movlo r12, r4 ; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i8 @@ -3709,8 +3709,8 @@ define i16 @test_max_i16() { ; CHECK-ARM8-NEXT: @ Child Loop BB18_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: sxth r0, r1 -; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: cmp r0, #1 +; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: movgt r12, r1 ; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 @@ -3755,8 +3755,8 @@ define i16 @test_max_i16() { ; CHECK-ARM6-NEXT: @ Child Loop BB18_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: sxth r0, r1 -; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: cmp r0, #1 +; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: movgt r12, r1 ; CHECK-ARM6-NEXT: ldr r3, .LCPI18_0 ; CHECK-ARM6-NEXT: uxth r1, r1 @@ -3805,8 +3805,8 @@ define i16 @test_max_i16() { ; CHECK-THUMB7-NEXT: @ Child Loop BB18_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: sxth r0, r1 -; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: cmp r0, #1 +; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: it gt ; CHECK-THUMB7-NEXT: movgt r12, r1 ; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i16 @@ -3930,8 +3930,8 @@ define i16 @test_min_i16() { ; CHECK-ARM8-NEXT: @ Child Loop BB19_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: sxth r0, r1 -; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: cmp r0, #2 +; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: movlt r12, r1 ; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 @@ -3976,8 +3976,8 @@ define i16 @test_min_i16() { ; CHECK-ARM6-NEXT: @ Child Loop BB19_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: sxth r0, r1 -; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: cmp r0, #2 +; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: movlt r12, r1 ; CHECK-ARM6-NEXT: ldr r3, .LCPI19_0 ; CHECK-ARM6-NEXT: uxth r1, r1 @@ -4026,8 +4026,8 @@ define i16 @test_min_i16() { ; CHECK-THUMB7-NEXT: @ Child Loop BB19_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: sxth r0, r1 -; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: cmp r0, #2 +; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: it lt ; CHECK-THUMB7-NEXT: movlt r12, r1 ; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i16 @@ -4153,8 +4153,8 @@ define i16 @test_umax_i16() { ; CHECK-ARM8-NEXT: @ Child Loop BB20_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r12, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: uxth r1, r12 -; CHECK-ARM8-NEXT: mov lr, #1 ; CHECK-ARM8-NEXT: cmp r1, #1 +; CHECK-ARM8-NEXT: mov lr, #1 ; CHECK-ARM8-NEXT: movhi lr, r12 ; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 @@ -4200,8 +4200,8 @@ define i16 @test_umax_i16() { ; CHECK-ARM6-NEXT: @ Child Loop BB20_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: uxth r1, r12 -; CHECK-ARM6-NEXT: mov lr, #1 ; CHECK-ARM6-NEXT: cmp r1, #1 +; CHECK-ARM6-NEXT: mov lr, #1 ; CHECK-ARM6-NEXT: movhi lr, r12 ; CHECK-ARM6-NEXT: ldr r3, .LCPI20_0 ; CHECK-ARM6-NEXT: uxth r12, r12 @@ -4251,8 +4251,8 @@ define i16 @test_umax_i16() { ; CHECK-THUMB7-NEXT: @ Child Loop BB20_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: uxth r1, r4 -; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: cmp r1, #1 +; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: it hi ; CHECK-THUMB7-NEXT: movhi r12, r4 ; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i16 @@ -4378,8 +4378,8 @@ define i16 @test_umin_i16() { ; CHECK-ARM8-NEXT: @ Child Loop BB21_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r12, [sp, #4] @ 4-byte Reload ; CHECK-ARM8-NEXT: uxth r1, r12 -; CHECK-ARM8-NEXT: mov lr, #1 ; CHECK-ARM8-NEXT: cmp r1, #2 +; CHECK-ARM8-NEXT: mov lr, #1 ; CHECK-ARM8-NEXT: movlo lr, r12 ; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i16 ; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i16 @@ -4425,8 +4425,8 @@ define i16 @test_umin_i16() { ; CHECK-ARM6-NEXT: @ Child Loop BB21_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r12, [sp, #4] @ 4-byte Reload ; CHECK-ARM6-NEXT: uxth r1, r12 -; CHECK-ARM6-NEXT: mov lr, #1 ; CHECK-ARM6-NEXT: cmp r1, #2 +; CHECK-ARM6-NEXT: mov lr, #1 ; CHECK-ARM6-NEXT: movlo lr, r12 ; CHECK-ARM6-NEXT: ldr r3, .LCPI21_0 ; CHECK-ARM6-NEXT: uxth r12, r12 @@ -4476,8 +4476,8 @@ define i16 @test_umin_i16() { ; CHECK-THUMB7-NEXT: @ Child Loop BB21_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload ; CHECK-THUMB7-NEXT: uxth r1, r4 -; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: cmp r1, #2 +; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: it lo ; CHECK-THUMB7-NEXT: movlo r12, r4 ; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i16 @@ -5939,8 +5939,8 @@ define i32 @test_max_i32() { ; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-ARM8-NEXT: @ Child Loop BB29_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: cmp r1, #1 +; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: movgt r12, r1 ; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 @@ -5982,8 +5982,8 @@ define i32 @test_max_i32() { ; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-ARM6-NEXT: @ Child Loop BB29_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: cmp r1, #1 +; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: movgt r12, r1 ; CHECK-ARM6-NEXT: ldr r3, .LCPI29_0 ; CHECK-ARM6-NEXT: .LBB29_2: @ %atomicrmw.start @@ -6029,8 +6029,8 @@ define i32 @test_max_i32() { ; CHECK-THUMB7-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-THUMB7-NEXT: @ Child Loop BB29_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: cmp r1, #1 +; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: it gt ; CHECK-THUMB7-NEXT: movgt r12, r1 ; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i32 @@ -6148,8 +6148,8 @@ define i32 @test_min_i32() { ; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-ARM8-NEXT: @ Child Loop BB30_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: cmp r1, #2 +; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: movlt r12, r1 ; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 @@ -6191,8 +6191,8 @@ define i32 @test_min_i32() { ; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-ARM6-NEXT: @ Child Loop BB30_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: cmp r1, #2 +; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: movlt r12, r1 ; CHECK-ARM6-NEXT: ldr r3, .LCPI30_0 ; CHECK-ARM6-NEXT: .LBB30_2: @ %atomicrmw.start @@ -6238,8 +6238,8 @@ define i32 @test_min_i32() { ; CHECK-THUMB7-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-THUMB7-NEXT: @ Child Loop BB30_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: cmp r1, #2 +; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: it lt ; CHECK-THUMB7-NEXT: movlt r12, r1 ; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i32 @@ -6357,8 +6357,8 @@ define i32 @test_umax_i32() { ; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-ARM8-NEXT: @ Child Loop BB31_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: cmp r1, #1 +; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: movhi r12, r1 ; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 @@ -6400,8 +6400,8 @@ define i32 @test_umax_i32() { ; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-ARM6-NEXT: @ Child Loop BB31_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: cmp r1, #1 +; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: movhi r12, r1 ; CHECK-ARM6-NEXT: ldr r3, .LCPI31_0 ; CHECK-ARM6-NEXT: .LBB31_2: @ %atomicrmw.start @@ -6447,8 +6447,8 @@ define i32 @test_umax_i32() { ; CHECK-THUMB7-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-THUMB7-NEXT: @ Child Loop BB31_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: cmp r1, #1 +; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: it hi ; CHECK-THUMB7-NEXT: movhi r12, r1 ; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i32 @@ -6566,8 +6566,8 @@ define i32 @test_umin_i32() { ; CHECK-ARM8-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-ARM8-NEXT: @ Child Loop BB32_2 Depth 2 ; CHECK-ARM8-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: cmp r1, #2 +; CHECK-ARM8-NEXT: mov r12, #1 ; CHECK-ARM8-NEXT: movlo r12, r1 ; CHECK-ARM8-NEXT: movw r3, :lower16:atomic_i32 ; CHECK-ARM8-NEXT: movt r3, :upper16:atomic_i32 @@ -6609,8 +6609,8 @@ define i32 @test_umin_i32() { ; CHECK-ARM6-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-ARM6-NEXT: @ Child Loop BB32_2 Depth 2 ; CHECK-ARM6-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: cmp r1, #2 +; CHECK-ARM6-NEXT: mov r12, #1 ; CHECK-ARM6-NEXT: movlo r12, r1 ; CHECK-ARM6-NEXT: ldr r3, .LCPI32_0 ; CHECK-ARM6-NEXT: .LBB32_2: @ %atomicrmw.start @@ -6656,8 +6656,8 @@ define i32 @test_umin_i32() { ; CHECK-THUMB7-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-THUMB7-NEXT: @ Child Loop BB32_2 Depth 2 ; CHECK-THUMB7-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: cmp r1, #2 +; CHECK-THUMB7-NEXT: mov.w r12, #1 ; CHECK-THUMB7-NEXT: it lo ; CHECK-THUMB7-NEXT: movlo r12, r1 ; CHECK-THUMB7-NEXT: movw r3, :lower16:atomic_i32 @@ -8342,8 +8342,7 @@ define i64 @test_max_i64() { ; CHECK-ARM8-NEXT: mov r0, #0 ; CHECK-ARM8-NEXT: movwlt r0, #1 ; CHECK-ARM8-NEXT: mov r10, #1 -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: movne r10, r2 +; CHECK-ARM8-NEXT: movlt r10, r2 ; CHECK-ARM8-NEXT: cmp r0, #0 ; CHECK-ARM8-NEXT: movne r0, r1 ; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 @@ -8410,8 +8409,7 @@ define i64 @test_max_i64() { ; CHECK-ARM6-NEXT: mov r0, #0 ; CHECK-ARM6-NEXT: movlt r0, #1 ; CHECK-ARM6-NEXT: mov r10, #1 -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: movne r10, r2 +; CHECK-ARM6-NEXT: movlt r10, r2 ; CHECK-ARM6-NEXT: cmp r0, #0 ; CHECK-ARM6-NEXT: movne r0, r1 ; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 @@ -8483,9 +8481,8 @@ define i64 @test_max_i64() { ; CHECK-THUMB7-NEXT: mov r8, r2 ; CHECK-THUMB7-NEXT: mov r9, r1 ; CHECK-THUMB7-NEXT: mov.w r10, #1 -; CHECK-THUMB7-NEXT: cmp r0, #0 -; CHECK-THUMB7-NEXT: it ne -; CHECK-THUMB7-NEXT: movne r10, r2 +; CHECK-THUMB7-NEXT: it lt +; CHECK-THUMB7-NEXT: movlt r10, r2 ; CHECK-THUMB7-NEXT: cmp r0, #0 ; CHECK-THUMB7-NEXT: it ne ; CHECK-THUMB7-NEXT: movne r0, r1 @@ -8581,7 +8578,7 @@ define i64 @test_max_i64() { ; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload ; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB40_5 +; CHECK-THUMB8BASE-NEXT: blt .LBB40_5 ; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB40_1 Depth=1 ; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload @@ -8658,8 +8655,7 @@ define i64 @test_min_i64() { ; CHECK-ARM8-NEXT: mov r0, #0 ; CHECK-ARM8-NEXT: movwlt r0, #1 ; CHECK-ARM8-NEXT: mov r10, #1 -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: movne r10, r2 +; CHECK-ARM8-NEXT: movlt r10, r2 ; CHECK-ARM8-NEXT: cmp r0, #0 ; CHECK-ARM8-NEXT: movne r0, r1 ; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 @@ -8726,8 +8722,7 @@ define i64 @test_min_i64() { ; CHECK-ARM6-NEXT: mov r0, #0 ; CHECK-ARM6-NEXT: movlt r0, #1 ; CHECK-ARM6-NEXT: mov r10, #1 -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: movne r10, r2 +; CHECK-ARM6-NEXT: movlt r10, r2 ; CHECK-ARM6-NEXT: cmp r0, #0 ; CHECK-ARM6-NEXT: movne r0, r1 ; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 @@ -8799,9 +8794,8 @@ define i64 @test_min_i64() { ; CHECK-THUMB7-NEXT: it lt ; CHECK-THUMB7-NEXT: movlt r0, #1 ; CHECK-THUMB7-NEXT: mov.w r10, #1 -; CHECK-THUMB7-NEXT: cmp r0, #0 -; CHECK-THUMB7-NEXT: it ne -; CHECK-THUMB7-NEXT: movne r10, r2 +; CHECK-THUMB7-NEXT: it lt +; CHECK-THUMB7-NEXT: movlt r10, r2 ; CHECK-THUMB7-NEXT: cmp r0, #0 ; CHECK-THUMB7-NEXT: it ne ; CHECK-THUMB7-NEXT: movne r0, r1 @@ -8897,7 +8891,7 @@ define i64 @test_min_i64() { ; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload ; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB41_5 +; CHECK-THUMB8BASE-NEXT: blt .LBB41_5 ; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB41_1 Depth=1 ; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload @@ -8974,8 +8968,7 @@ define i64 @test_umax_i64() { ; CHECK-ARM8-NEXT: mov r0, #0 ; CHECK-ARM8-NEXT: movwlo r0, #1 ; CHECK-ARM8-NEXT: mov r10, #1 -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: movne r10, r2 +; CHECK-ARM8-NEXT: movlo r10, r2 ; CHECK-ARM8-NEXT: cmp r0, #0 ; CHECK-ARM8-NEXT: movne r0, r1 ; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 @@ -9042,8 +9035,7 @@ define i64 @test_umax_i64() { ; CHECK-ARM6-NEXT: mov r0, #0 ; CHECK-ARM6-NEXT: movlo r0, #1 ; CHECK-ARM6-NEXT: mov r10, #1 -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: movne r10, r2 +; CHECK-ARM6-NEXT: movlo r10, r2 ; CHECK-ARM6-NEXT: cmp r0, #0 ; CHECK-ARM6-NEXT: movne r0, r1 ; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 @@ -9115,9 +9107,8 @@ define i64 @test_umax_i64() { ; CHECK-THUMB7-NEXT: mov r8, r2 ; CHECK-THUMB7-NEXT: mov r9, r1 ; CHECK-THUMB7-NEXT: mov.w r10, #1 -; CHECK-THUMB7-NEXT: cmp r0, #0 -; CHECK-THUMB7-NEXT: it ne -; CHECK-THUMB7-NEXT: movne r10, r2 +; CHECK-THUMB7-NEXT: it lo +; CHECK-THUMB7-NEXT: movlo r10, r2 ; CHECK-THUMB7-NEXT: cmp r0, #0 ; CHECK-THUMB7-NEXT: it ne ; CHECK-THUMB7-NEXT: movne r0, r1 @@ -9213,7 +9204,7 @@ define i64 @test_umax_i64() { ; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload ; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB42_5 +; CHECK-THUMB8BASE-NEXT: blo .LBB42_5 ; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB42_1 Depth=1 ; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #48] @ 4-byte Reload @@ -9290,8 +9281,7 @@ define i64 @test_umin_i64() { ; CHECK-ARM8-NEXT: mov r0, #0 ; CHECK-ARM8-NEXT: movwlo r0, #1 ; CHECK-ARM8-NEXT: mov r10, #1 -; CHECK-ARM8-NEXT: cmp r0, #0 -; CHECK-ARM8-NEXT: movne r10, r2 +; CHECK-ARM8-NEXT: movlo r10, r2 ; CHECK-ARM8-NEXT: cmp r0, #0 ; CHECK-ARM8-NEXT: movne r0, r1 ; CHECK-ARM8-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 @@ -9358,8 +9348,7 @@ define i64 @test_umin_i64() { ; CHECK-ARM6-NEXT: mov r0, #0 ; CHECK-ARM6-NEXT: movlo r0, #1 ; CHECK-ARM6-NEXT: mov r10, #1 -; CHECK-ARM6-NEXT: cmp r0, #0 -; CHECK-ARM6-NEXT: movne r10, r2 +; CHECK-ARM6-NEXT: movlo r10, r2 ; CHECK-ARM6-NEXT: cmp r0, #0 ; CHECK-ARM6-NEXT: movne r0, r1 ; CHECK-ARM6-NEXT: @ kill: def $r10 killed $r10 def $r10_r11 @@ -9431,9 +9420,8 @@ define i64 @test_umin_i64() { ; CHECK-THUMB7-NEXT: it lo ; CHECK-THUMB7-NEXT: movlo r0, #1 ; CHECK-THUMB7-NEXT: mov.w r10, #1 -; CHECK-THUMB7-NEXT: cmp r0, #0 -; CHECK-THUMB7-NEXT: it ne -; CHECK-THUMB7-NEXT: movne r10, r2 +; CHECK-THUMB7-NEXT: it lo +; CHECK-THUMB7-NEXT: movlo r10, r2 ; CHECK-THUMB7-NEXT: cmp r0, #0 ; CHECK-THUMB7-NEXT: it ne ; CHECK-THUMB7-NEXT: movne r0, r1 @@ -9529,7 +9517,7 @@ define i64 @test_umin_i64() { ; CHECK-THUMB8BASE-NEXT: ldr r1, [sp, #52] @ 4-byte Reload ; CHECK-THUMB8BASE-NEXT: str r1, [sp, #28] @ 4-byte Spill ; CHECK-THUMB8BASE-NEXT: str r0, [sp, #32] @ 4-byte Spill -; CHECK-THUMB8BASE-NEXT: cbnz r1, .LBB43_5 +; CHECK-THUMB8BASE-NEXT: blo .LBB43_5 ; CHECK-THUMB8BASE-NEXT: @ %bb.4: @ %atomicrmw.start ; CHECK-THUMB8BASE-NEXT: @ in Loop: Header=BB43_1 Depth=1 ; CHECK-THUMB8BASE-NEXT: ldr r0, [sp, #44] @ 4-byte Reload diff --git a/llvm/test/CodeGen/ARM/bfi.ll b/llvm/test/CodeGen/ARM/bfi.ll index 91a74e535a221..5aeb99695a5fe 100644 --- a/llvm/test/CodeGen/ARM/bfi.ll +++ b/llvm/test/CodeGen/ARM/bfi.ll @@ -204,10 +204,11 @@ define i32 @f12(i32 %x, i32 %y) { define i32 @f13(i32 %x, i32 %y) { ; CHECK-LABEL: f13: ; CHECK: @ %bb.0: -; CHECK-NEXT: and r2, r0, #4 -; CHECK-NEXT: bic r0, r1, #255 -; CHECK-NEXT: cmp r2, #42 -; CHECK-NEXT: orrne r0, r0, #16 +; CHECK-NEXT: and r0, r0, #4 +; CHECK-NEXT: bic r1, r1, #255 +; CHECK-NEXT: cmp r0, #42 +; CHECK-NEXT: orrne r1, r1, #16 +; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bx lr %y2 = and i32 %y, 4294967040 ; 0xFFFFFF00 %and = and i32 %x, 4 diff --git a/llvm/test/CodeGen/ARM/cmov_fp16.ll b/llvm/test/CodeGen/ARM/cmov_fp16.ll index 805955d3e8306..fb8da3724ede8 100644 --- a/llvm/test/CodeGen/ARM/cmov_fp16.ll +++ b/llvm/test/CodeGen/ARM/cmov_fp16.ll @@ -5,12 +5,12 @@ define i32 @test_ne(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-LABEL: test_ne: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmov s2, r1 ; CHECK-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-NEXT: vcvt.f16.u32 s2, s2 -; CHECK-NEXT: vseleq.f16 s0, s0, s2 +; CHECK-NEXT: vseleq.f16 s0, s2, s0 ; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: bx lr entry: @@ -26,12 +26,12 @@ entry: define i32 @test_eq(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-LABEL: test_eq: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vmov s0, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-NEXT: vcvt.f16.u32 s2, s2 -; CHECK-NEXT: vseleq.f16 s0, s0, s2 +; CHECK-NEXT: vseleq.f16 s0, s2, s0 ; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: bx lr entry: @@ -47,12 +47,12 @@ entry: define i32 @test_gt(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-LABEL: test_gt: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vmov s0, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-NEXT: vcvt.f16.u32 s2, s2 -; CHECK-NEXT: vselgt.f16 s0, s0, s2 +; CHECK-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: bx lr entry: @@ -68,12 +68,12 @@ entry: define i32 @test_ge(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-LABEL: test_ge: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vmov s0, r1 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: vmov s2, r1 +; CHECK-NEXT: vmov s2, r0 ; CHECK-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-NEXT: vcvt.f16.u32 s2, s2 -; CHECK-NEXT: vselge.f16 s0, s0, s2 +; CHECK-NEXT: vselge.f16 s0, s2, s0 ; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: bx lr entry: @@ -89,12 +89,12 @@ entry: define i32 @test_lt(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-LABEL: test_lt: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmov s2, r1 ; CHECK-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-NEXT: vcvt.f16.u32 s2, s2 -; CHECK-NEXT: vselge.f16 s0, s0, s2 +; CHECK-NEXT: vselge.f16 s0, s2, s0 ; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: bx lr entry: @@ -110,12 +110,12 @@ entry: define i32 @test_le(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-LABEL: test_le: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov s0, r1 +; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: cmp r2, r3 -; CHECK-NEXT: vmov s2, r0 +; CHECK-NEXT: vmov s2, r1 ; CHECK-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-NEXT: vcvt.f16.u32 s2, s2 -; CHECK-NEXT: vselgt.f16 s0, s0, s2 +; CHECK-NEXT: vselgt.f16 s0, s2, s0 ; CHECK-NEXT: vmov.f16 r0, s0 ; CHECK-NEXT: bx lr entry: @@ -131,25 +131,25 @@ entry: define i32 @test_hi(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-THUMB-LABEL: test_hi: ; CHECK-THUMB: @ %bb.0: @ %entry -; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vmov s2, r1 ; CHECK-THUMB-NEXT: cmp r2, r3 -; CHECK-THUMB-NEXT: vmov s2, r0 -; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: vmov s0, r0 ; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-THUMB-NEXT: it hi -; CHECK-THUMB-NEXT: vmovhi.f32 s0, s2 -; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: vmovhi.f32 s2, s0 +; CHECK-THUMB-NEXT: vmov.f16 r0, s2 ; CHECK-THUMB-NEXT: bx lr ; ; CHECK-ARM-LABEL: test_hi: ; CHECK-ARM: @ %bb.0: @ %entry -; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vmov s0, r0 ; CHECK-ARM-NEXT: cmp r2, r3 -; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: vmov s2, r1 ; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 -; CHECK-ARM-NEXT: vmovhi.f32 s0, s2 -; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: vmovhi.f32 s2, s0 +; CHECK-ARM-NEXT: vmov.f16 r0, s2 ; CHECK-ARM-NEXT: bx lr entry: %x.half = uitofp i32 %x to half @@ -164,25 +164,25 @@ entry: define i32 @test_hs(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-THUMB-LABEL: test_hs: ; CHECK-THUMB: @ %bb.0: @ %entry -; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vmov s2, r1 ; CHECK-THUMB-NEXT: cmp r2, r3 -; CHECK-THUMB-NEXT: vmov s2, r0 -; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: vmov s0, r0 ; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-THUMB-NEXT: it hs -; CHECK-THUMB-NEXT: vmovhs.f32 s0, s2 -; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: vmovhs.f32 s2, s0 +; CHECK-THUMB-NEXT: vmov.f16 r0, s2 ; CHECK-THUMB-NEXT: bx lr ; ; CHECK-ARM-LABEL: test_hs: ; CHECK-ARM: @ %bb.0: @ %entry -; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vmov s0, r0 ; CHECK-ARM-NEXT: cmp r2, r3 -; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: vmov s2, r1 ; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 -; CHECK-ARM-NEXT: vmovhs.f32 s0, s2 -; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: vmovhs.f32 s2, s0 +; CHECK-ARM-NEXT: vmov.f16 r0, s2 ; CHECK-ARM-NEXT: bx lr entry: %x.half = uitofp i32 %x to half @@ -197,25 +197,25 @@ entry: define i32 @test_lo(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-THUMB-LABEL: test_lo: ; CHECK-THUMB: @ %bb.0: @ %entry -; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vmov s2, r1 ; CHECK-THUMB-NEXT: cmp r2, r3 -; CHECK-THUMB-NEXT: vmov s2, r0 -; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: vmov s0, r0 ; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-THUMB-NEXT: it lo -; CHECK-THUMB-NEXT: vmovlo.f32 s0, s2 -; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: vmovlo.f32 s2, s0 +; CHECK-THUMB-NEXT: vmov.f16 r0, s2 ; CHECK-THUMB-NEXT: bx lr ; ; CHECK-ARM-LABEL: test_lo: ; CHECK-ARM: @ %bb.0: @ %entry -; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vmov s0, r0 ; CHECK-ARM-NEXT: cmp r2, r3 -; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: vmov s2, r1 ; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 -; CHECK-ARM-NEXT: vmovlo.f32 s0, s2 -; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: vmovlo.f32 s2, s0 +; CHECK-ARM-NEXT: vmov.f16 r0, s2 ; CHECK-ARM-NEXT: bx lr entry: %x.half = uitofp i32 %x to half @@ -230,25 +230,25 @@ entry: define i32 @test_ls(i32 %x, i32 %y, i32 %a, i32 %b) { ; CHECK-THUMB-LABEL: test_ls: ; CHECK-THUMB: @ %bb.0: @ %entry -; CHECK-THUMB-NEXT: vmov s0, r1 +; CHECK-THUMB-NEXT: vmov s2, r1 ; CHECK-THUMB-NEXT: cmp r2, r3 -; CHECK-THUMB-NEXT: vmov s2, r0 -; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-THUMB-NEXT: vmov s0, r0 ; CHECK-THUMB-NEXT: vcvt.f16.u32 s2, s2 +; CHECK-THUMB-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-THUMB-NEXT: it ls -; CHECK-THUMB-NEXT: vmovls.f32 s0, s2 -; CHECK-THUMB-NEXT: vmov.f16 r0, s0 +; CHECK-THUMB-NEXT: vmovls.f32 s2, s0 +; CHECK-THUMB-NEXT: vmov.f16 r0, s2 ; CHECK-THUMB-NEXT: bx lr ; ; CHECK-ARM-LABEL: test_ls: ; CHECK-ARM: @ %bb.0: @ %entry -; CHECK-ARM-NEXT: vmov s0, r1 +; CHECK-ARM-NEXT: vmov s0, r0 ; CHECK-ARM-NEXT: cmp r2, r3 -; CHECK-ARM-NEXT: vmov s2, r0 +; CHECK-ARM-NEXT: vmov s2, r1 ; CHECK-ARM-NEXT: vcvt.f16.u32 s0, s0 ; CHECK-ARM-NEXT: vcvt.f16.u32 s2, s2 -; CHECK-ARM-NEXT: vmovls.f32 s0, s2 -; CHECK-ARM-NEXT: vmov.f16 r0, s0 +; CHECK-ARM-NEXT: vmovls.f32 s2, s0 +; CHECK-ARM-NEXT: vmov.f16 r0, s2 ; CHECK-ARM-NEXT: bx lr entry: %x.half = uitofp i32 %x to half diff --git a/llvm/test/CodeGen/ARM/cse-call.ll b/llvm/test/CodeGen/ARM/cse-call.ll index 71cfa3b9da930..25fa477e5c2d3 100644 --- a/llvm/test/CodeGen/ARM/cse-call.ll +++ b/llvm/test/CodeGen/ARM/cse-call.ll @@ -25,7 +25,9 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3 ; CHECK-T1: cmp ; CHECK: S_trimzeros -; CHECK: cmp +; CHECK-T1: S_trimzeros +; CHECK-NOT: moveq +; CHECK-T1-NOT: beq ; CHECK: strlen @F_floatmul.man1 = external global [200 x i8], align 1 diff --git a/llvm/test/CodeGen/ARM/cttz.ll b/llvm/test/CodeGen/ARM/cttz.ll index d9663a1c148fc..76adc61c5971f 100644 --- a/llvm/test/CodeGen/ARM/cttz.ll +++ b/llvm/test/CodeGen/ARM/cttz.ll @@ -223,42 +223,39 @@ define i64 @test_i64(i64 %a) { ; CHECK-6M: @ %bb.0: ; CHECK-6M-NEXT: .save {r4, r5, r7, lr} ; CHECK-6M-NEXT: push {r4, r5, r7, lr} +; CHECK-6M-NEXT: mov r2, r0 ; CHECK-6M-NEXT: ldr r5, .LCPI3_0 -; CHECK-6M-NEXT: adr r4, .LCPI3_1 -; CHECK-6M-NEXT: movs r3, #32 -; CHECK-6M-NEXT: cmp r0, #0 -; CHECK-6M-NEXT: mov r2, r3 -; CHECK-6M-NEXT: bne .LBB3_5 -; CHECK-6M-NEXT: @ %bb.1: +; CHECK-6M-NEXT: adr r3, .LCPI3_1 +; CHECK-6M-NEXT: movs r0, #32 ; CHECK-6M-NEXT: cmp r1, #0 -; CHECK-6M-NEXT: bne .LBB3_6 +; CHECK-6M-NEXT: mov r4, r0 +; CHECK-6M-NEXT: beq .LBB3_2 +; CHECK-6M-NEXT: @ %bb.1: +; CHECK-6M-NEXT: rsbs r4, r1, #0 +; CHECK-6M-NEXT: ands r4, r1 +; CHECK-6M-NEXT: muls r4, r5, r4 +; CHECK-6M-NEXT: lsrs r1, r4, #27 +; CHECK-6M-NEXT: ldrb r4, [r3, r1] ; CHECK-6M-NEXT: .LBB3_2: -; CHECK-6M-NEXT: cmp r0, #0 -; CHECK-6M-NEXT: bne .LBB3_4 -; CHECK-6M-NEXT: .LBB3_3: -; CHECK-6M-NEXT: adds r3, #32 -; CHECK-6M-NEXT: mov r2, r3 +; CHECK-6M-NEXT: adds r4, #32 +; CHECK-6M-NEXT: rsbs r1, r2, #0 +; CHECK-6M-NEXT: ands r1, r2 +; CHECK-6M-NEXT: muls r5, r1, r5 +; CHECK-6M-NEXT: lsrs r1, r5, #27 +; CHECK-6M-NEXT: cmp r2, #0 +; CHECK-6M-NEXT: bne .LBB3_5 +; CHECK-6M-NEXT: @ %bb.3: +; CHECK-6M-NEXT: beq .LBB3_6 ; CHECK-6M-NEXT: .LBB3_4: ; CHECK-6M-NEXT: movs r1, #0 -; CHECK-6M-NEXT: mov r0, r2 ; CHECK-6M-NEXT: pop {r4, r5, r7, pc} ; CHECK-6M-NEXT: .LBB3_5: -; CHECK-6M-NEXT: rsbs r2, r0, #0 -; CHECK-6M-NEXT: ands r2, r0 -; CHECK-6M-NEXT: muls r2, r5, r2 -; CHECK-6M-NEXT: lsrs r2, r2, #27 -; CHECK-6M-NEXT: ldrb r2, [r4, r2] -; CHECK-6M-NEXT: cmp r1, #0 -; CHECK-6M-NEXT: beq .LBB3_2 +; CHECK-6M-NEXT: ldrb r0, [r3, r1] +; CHECK-6M-NEXT: bne .LBB3_4 ; CHECK-6M-NEXT: .LBB3_6: -; CHECK-6M-NEXT: rsbs r3, r1, #0 -; CHECK-6M-NEXT: ands r3, r1 -; CHECK-6M-NEXT: muls r5, r3, r5 -; CHECK-6M-NEXT: lsrs r1, r5, #27 -; CHECK-6M-NEXT: ldrb r3, [r4, r1] -; CHECK-6M-NEXT: cmp r0, #0 -; CHECK-6M-NEXT: beq .LBB3_3 -; CHECK-6M-NEXT: b .LBB3_4 +; CHECK-6M-NEXT: mov r0, r4 +; CHECK-6M-NEXT: movs r1, #0 +; CHECK-6M-NEXT: pop {r4, r5, r7, pc} ; CHECK-6M-NEXT: .p2align 2 ; CHECK-6M-NEXT: @ %bb.7: ; CHECK-6M-NEXT: .LCPI3_0: @@ -270,40 +267,39 @@ define i64 @test_i64(i64 %a) { ; CHECK-8MBASE: @ %bb.0: ; CHECK-8MBASE-NEXT: .save {r4, r5, r7, lr} ; CHECK-8MBASE-NEXT: push {r4, r5, r7, lr} +; CHECK-8MBASE-NEXT: mov r2, r0 ; CHECK-8MBASE-NEXT: movw r5, #46385 ; CHECK-8MBASE-NEXT: movt r5, #1916 -; CHECK-8MBASE-NEXT: adr r4, .LCPI3_0 -; CHECK-8MBASE-NEXT: movs r3, #32 -; CHECK-8MBASE-NEXT: mov r2, r3 -; CHECK-8MBASE-NEXT: cbnz r0, .LBB3_5 +; CHECK-8MBASE-NEXT: adr r3, .LCPI3_0 +; CHECK-8MBASE-NEXT: movs r0, #32 +; CHECK-8MBASE-NEXT: mov r4, r0 +; CHECK-8MBASE-NEXT: cbz r1, .LBB3_2 ; CHECK-8MBASE-NEXT: @ %bb.1: -; CHECK-8MBASE-NEXT: cbnz r1, .LBB3_6 +; CHECK-8MBASE-NEXT: rsbs r4, r1, #0 +; CHECK-8MBASE-NEXT: ands r4, r1 +; CHECK-8MBASE-NEXT: muls r4, r5, r4 +; CHECK-8MBASE-NEXT: lsrs r1, r4, #27 +; CHECK-8MBASE-NEXT: ldrb r4, [r3, r1] ; CHECK-8MBASE-NEXT: .LBB3_2: -; CHECK-8MBASE-NEXT: cbnz r0, .LBB3_4 -; CHECK-8MBASE-NEXT: .LBB3_3: -; CHECK-8MBASE-NEXT: adds r3, #32 -; CHECK-8MBASE-NEXT: mov r2, r3 +; CHECK-8MBASE-NEXT: adds r4, #32 +; CHECK-8MBASE-NEXT: rsbs r1, r2, #0 +; CHECK-8MBASE-NEXT: ands r1, r2 +; CHECK-8MBASE-NEXT: muls r5, r1, r5 +; CHECK-8MBASE-NEXT: lsrs r1, r5, #27 +; CHECK-8MBASE-NEXT: cmp r2, #0 +; CHECK-8MBASE-NEXT: bne .LBB3_5 +; CHECK-8MBASE-NEXT: @ %bb.3: +; CHECK-8MBASE-NEXT: beq .LBB3_6 ; CHECK-8MBASE-NEXT: .LBB3_4: ; CHECK-8MBASE-NEXT: movs r1, #0 -; CHECK-8MBASE-NEXT: mov r0, r2 ; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc} ; CHECK-8MBASE-NEXT: .LBB3_5: -; CHECK-8MBASE-NEXT: rsbs r2, r0, #0 -; CHECK-8MBASE-NEXT: ands r2, r0 -; CHECK-8MBASE-NEXT: muls r2, r5, r2 -; CHECK-8MBASE-NEXT: lsrs r2, r2, #27 -; CHECK-8MBASE-NEXT: ldrb r2, [r4, r2] -; CHECK-8MBASE-NEXT: cmp r1, #0 -; CHECK-8MBASE-NEXT: beq .LBB3_2 +; CHECK-8MBASE-NEXT: ldrb r0, [r3, r1] +; CHECK-8MBASE-NEXT: bne .LBB3_4 ; CHECK-8MBASE-NEXT: .LBB3_6: -; CHECK-8MBASE-NEXT: rsbs r3, r1, #0 -; CHECK-8MBASE-NEXT: ands r3, r1 -; CHECK-8MBASE-NEXT: muls r5, r3, r5 -; CHECK-8MBASE-NEXT: lsrs r1, r5, #27 -; CHECK-8MBASE-NEXT: ldrb r3, [r4, r1] -; CHECK-8MBASE-NEXT: cmp r0, #0 -; CHECK-8MBASE-NEXT: beq .LBB3_3 -; CHECK-8MBASE-NEXT: b .LBB3_4 +; CHECK-8MBASE-NEXT: mov r0, r4 +; CHECK-8MBASE-NEXT: movs r1, #0 +; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc} ; CHECK-8MBASE-NEXT: .p2align 2 ; CHECK-8MBASE-NEXT: @ %bb.7: ; CHECK-8MBASE-NEXT: .LCPI3_0: @@ -494,42 +490,39 @@ define i64 @test_i64_zero_undef(i64 %a) { ; CHECK-6M: @ %bb.0: ; CHECK-6M-NEXT: .save {r4, r5, r7, lr} ; CHECK-6M-NEXT: push {r4, r5, r7, lr} +; CHECK-6M-NEXT: mov r2, r0 ; CHECK-6M-NEXT: ldr r5, .LCPI7_0 -; CHECK-6M-NEXT: adr r4, .LCPI7_1 -; CHECK-6M-NEXT: movs r3, #32 -; CHECK-6M-NEXT: cmp r0, #0 -; CHECK-6M-NEXT: mov r2, r3 -; CHECK-6M-NEXT: bne .LBB7_5 -; CHECK-6M-NEXT: @ %bb.1: +; CHECK-6M-NEXT: adr r3, .LCPI7_1 +; CHECK-6M-NEXT: movs r0, #32 ; CHECK-6M-NEXT: cmp r1, #0 -; CHECK-6M-NEXT: bne .LBB7_6 +; CHECK-6M-NEXT: mov r4, r0 +; CHECK-6M-NEXT: beq .LBB7_2 +; CHECK-6M-NEXT: @ %bb.1: +; CHECK-6M-NEXT: rsbs r4, r1, #0 +; CHECK-6M-NEXT: ands r4, r1 +; CHECK-6M-NEXT: muls r4, r5, r4 +; CHECK-6M-NEXT: lsrs r1, r4, #27 +; CHECK-6M-NEXT: ldrb r4, [r3, r1] ; CHECK-6M-NEXT: .LBB7_2: -; CHECK-6M-NEXT: cmp r0, #0 -; CHECK-6M-NEXT: bne .LBB7_4 -; CHECK-6M-NEXT: .LBB7_3: -; CHECK-6M-NEXT: adds r3, #32 -; CHECK-6M-NEXT: mov r2, r3 +; CHECK-6M-NEXT: adds r4, #32 +; CHECK-6M-NEXT: rsbs r1, r2, #0 +; CHECK-6M-NEXT: ands r1, r2 +; CHECK-6M-NEXT: muls r5, r1, r5 +; CHECK-6M-NEXT: lsrs r1, r5, #27 +; CHECK-6M-NEXT: cmp r2, #0 +; CHECK-6M-NEXT: bne .LBB7_5 +; CHECK-6M-NEXT: @ %bb.3: +; CHECK-6M-NEXT: beq .LBB7_6 ; CHECK-6M-NEXT: .LBB7_4: ; CHECK-6M-NEXT: movs r1, #0 -; CHECK-6M-NEXT: mov r0, r2 ; CHECK-6M-NEXT: pop {r4, r5, r7, pc} ; CHECK-6M-NEXT: .LBB7_5: -; CHECK-6M-NEXT: rsbs r2, r0, #0 -; CHECK-6M-NEXT: ands r2, r0 -; CHECK-6M-NEXT: muls r2, r5, r2 -; CHECK-6M-NEXT: lsrs r2, r2, #27 -; CHECK-6M-NEXT: ldrb r2, [r4, r2] -; CHECK-6M-NEXT: cmp r1, #0 -; CHECK-6M-NEXT: beq .LBB7_2 +; CHECK-6M-NEXT: ldrb r0, [r3, r1] +; CHECK-6M-NEXT: bne .LBB7_4 ; CHECK-6M-NEXT: .LBB7_6: -; CHECK-6M-NEXT: rsbs r3, r1, #0 -; CHECK-6M-NEXT: ands r3, r1 -; CHECK-6M-NEXT: muls r5, r3, r5 -; CHECK-6M-NEXT: lsrs r1, r5, #27 -; CHECK-6M-NEXT: ldrb r3, [r4, r1] -; CHECK-6M-NEXT: cmp r0, #0 -; CHECK-6M-NEXT: beq .LBB7_3 -; CHECK-6M-NEXT: b .LBB7_4 +; CHECK-6M-NEXT: mov r0, r4 +; CHECK-6M-NEXT: movs r1, #0 +; CHECK-6M-NEXT: pop {r4, r5, r7, pc} ; CHECK-6M-NEXT: .p2align 2 ; CHECK-6M-NEXT: @ %bb.7: ; CHECK-6M-NEXT: .LCPI7_0: @@ -541,40 +534,39 @@ define i64 @test_i64_zero_undef(i64 %a) { ; CHECK-8MBASE: @ %bb.0: ; CHECK-8MBASE-NEXT: .save {r4, r5, r7, lr} ; CHECK-8MBASE-NEXT: push {r4, r5, r7, lr} +; CHECK-8MBASE-NEXT: mov r2, r0 ; CHECK-8MBASE-NEXT: movw r5, #46385 ; CHECK-8MBASE-NEXT: movt r5, #1916 -; CHECK-8MBASE-NEXT: adr r4, .LCPI7_0 -; CHECK-8MBASE-NEXT: movs r3, #32 -; CHECK-8MBASE-NEXT: mov r2, r3 -; CHECK-8MBASE-NEXT: cbnz r0, .LBB7_5 +; CHECK-8MBASE-NEXT: adr r3, .LCPI7_0 +; CHECK-8MBASE-NEXT: movs r0, #32 +; CHECK-8MBASE-NEXT: mov r4, r0 +; CHECK-8MBASE-NEXT: cbz r1, .LBB7_2 ; CHECK-8MBASE-NEXT: @ %bb.1: -; CHECK-8MBASE-NEXT: cbnz r1, .LBB7_6 +; CHECK-8MBASE-NEXT: rsbs r4, r1, #0 +; CHECK-8MBASE-NEXT: ands r4, r1 +; CHECK-8MBASE-NEXT: muls r4, r5, r4 +; CHECK-8MBASE-NEXT: lsrs r1, r4, #27 +; CHECK-8MBASE-NEXT: ldrb r4, [r3, r1] ; CHECK-8MBASE-NEXT: .LBB7_2: -; CHECK-8MBASE-NEXT: cbnz r0, .LBB7_4 -; CHECK-8MBASE-NEXT: .LBB7_3: -; CHECK-8MBASE-NEXT: adds r3, #32 -; CHECK-8MBASE-NEXT: mov r2, r3 +; CHECK-8MBASE-NEXT: adds r4, #32 +; CHECK-8MBASE-NEXT: rsbs r1, r2, #0 +; CHECK-8MBASE-NEXT: ands r1, r2 +; CHECK-8MBASE-NEXT: muls r5, r1, r5 +; CHECK-8MBASE-NEXT: lsrs r1, r5, #27 +; CHECK-8MBASE-NEXT: cmp r2, #0 +; CHECK-8MBASE-NEXT: bne .LBB7_5 +; CHECK-8MBASE-NEXT: @ %bb.3: +; CHECK-8MBASE-NEXT: beq .LBB7_6 ; CHECK-8MBASE-NEXT: .LBB7_4: ; CHECK-8MBASE-NEXT: movs r1, #0 -; CHECK-8MBASE-NEXT: mov r0, r2 ; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc} ; CHECK-8MBASE-NEXT: .LBB7_5: -; CHECK-8MBASE-NEXT: rsbs r2, r0, #0 -; CHECK-8MBASE-NEXT: ands r2, r0 -; CHECK-8MBASE-NEXT: muls r2, r5, r2 -; CHECK-8MBASE-NEXT: lsrs r2, r2, #27 -; CHECK-8MBASE-NEXT: ldrb r2, [r4, r2] -; CHECK-8MBASE-NEXT: cmp r1, #0 -; CHECK-8MBASE-NEXT: beq .LBB7_2 +; CHECK-8MBASE-NEXT: ldrb r0, [r3, r1] +; CHECK-8MBASE-NEXT: bne .LBB7_4 ; CHECK-8MBASE-NEXT: .LBB7_6: -; CHECK-8MBASE-NEXT: rsbs r3, r1, #0 -; CHECK-8MBASE-NEXT: ands r3, r1 -; CHECK-8MBASE-NEXT: muls r5, r3, r5 -; CHECK-8MBASE-NEXT: lsrs r1, r5, #27 -; CHECK-8MBASE-NEXT: ldrb r3, [r4, r1] -; CHECK-8MBASE-NEXT: cmp r0, #0 -; CHECK-8MBASE-NEXT: beq .LBB7_3 -; CHECK-8MBASE-NEXT: b .LBB7_4 +; CHECK-8MBASE-NEXT: mov r0, r4 +; CHECK-8MBASE-NEXT: movs r1, #0 +; CHECK-8MBASE-NEXT: pop {r4, r5, r7, pc} ; CHECK-8MBASE-NEXT: .p2align 2 ; CHECK-8MBASE-NEXT: @ %bb.7: ; CHECK-8MBASE-NEXT: .LCPI7_0: diff --git a/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll b/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll index 3f2b40460917e..90bb02fdc0cd4 100644 --- a/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll +++ b/llvm/test/CodeGen/ARM/fadd-select-fneg-combine.ll @@ -4,12 +4,12 @@ define float @fadd_select_fneg_fneg_f32(i32 %arg0, float %x, float %y, float %z) { ; CHECK-LABEL: fadd_select_fneg_fneg_f32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov s0, r3 +; CHECK-NEXT: vmov s0, r2 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov s2, r2 ; CHECK-NEXT: vmov s4, r1 -; CHECK-NEXT: vseleq.f32 s2, s4, s2 -; CHECK-NEXT: vsub.f32 s0, s0, s2 +; CHECK-NEXT: vmov s2, r3 +; CHECK-NEXT: vseleq.f32 s0, s4, s0 +; CHECK-NEXT: vsub.f32 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bx lr %cmp = icmp eq i32 %arg0, 0 @@ -248,10 +248,10 @@ define half @fadd_select_fsub_select_f16(i32 %arg0, half %x, half %y, half %z) { define half @fadd_select_fneg_negk_f16(i32 %arg0, half %x, half %y) { ; CHECK-LABEL: fadd_select_fneg_negk_f16: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f16 s0, r1 -; CHECK-NEXT: vmov.f16 s2, #4.000000e+00 +; CHECK-NEXT: vmov.f16 s0, #4.000000e+00 +; CHECK-NEXT: vmov.f16 s2, r1 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vseleq.f16 s0, s0, s2 +; CHECK-NEXT: vseleq.f16 s0, s2, s0 ; CHECK-NEXT: vmov.f16 s2, r2 ; CHECK-NEXT: vsub.f16 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 @@ -266,10 +266,10 @@ define half @fadd_select_fneg_negk_f16(i32 %arg0, half %x, half %y) { define half @fadd_select_fneg_posk_f16(i32 %arg0, half %x, half %y) { ; CHECK-LABEL: fadd_select_fneg_posk_f16: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f16 s0, r1 -; CHECK-NEXT: vmov.f16 s2, #-4.000000e+00 +; CHECK-NEXT: vmov.f16 s0, #-4.000000e+00 +; CHECK-NEXT: vmov.f16 s2, r1 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vseleq.f16 s0, s0, s2 +; CHECK-NEXT: vseleq.f16 s0, s2, s0 ; CHECK-NEXT: vmov.f16 s2, r2 ; CHECK-NEXT: vsub.f16 s0, s2, s0 ; CHECK-NEXT: vmov r0, s0 diff --git a/llvm/test/CodeGen/ARM/fcmp-xo.ll b/llvm/test/CodeGen/ARM/fcmp-xo.ll index 908dbd7a11a6b..ad39cb744620d 100644 --- a/llvm/test/CodeGen/ARM/fcmp-xo.ll +++ b/llvm/test/CodeGen/ARM/fcmp-xo.ll @@ -69,12 +69,12 @@ define arm_aapcs_vfpcc float @float128(float %a0) local_unnamed_addr { define arm_aapcs_vfpcc double @double1(double %a0) local_unnamed_addr { ; CHECK-LABEL: double1: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f64 d18, #1.000000e+00 -; CHECK-NEXT: vcmp.f64 d18, d0 +; CHECK-NEXT: vmov.f64 d16, #1.000000e+00 +; CHECK-NEXT: vcmp.f64 d16, d0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov.f64 d16, #5.000000e-01 -; CHECK-NEXT: vmov.f64 d17, #-5.000000e-01 -; CHECK-NEXT: vselgt.f64 d0, d17, d16 +; CHECK-NEXT: vmov.f64 d17, #5.000000e-01 +; CHECK-NEXT: vmov.f64 d18, #-5.000000e-01 +; CHECK-NEXT: vselgt.f64 d0, d18, d17 ; CHECK-NEXT: bx lr %1 = fcmp nsz olt double %a0, 1.000000e+00 %2 = select i1 %1, double -5.000000e-01, double 5.000000e-01 @@ -87,12 +87,12 @@ define arm_aapcs_vfpcc double @double128(double %a0) local_unnamed_addr { ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: movt r0, #16480 -; CHECK-NEXT: vmov.f64 d16, #5.000000e-01 -; CHECK-NEXT: vmov d18, r1, r0 -; CHECK-NEXT: vcmp.f64 d18, d0 +; CHECK-NEXT: vmov.f64 d17, #5.000000e-01 +; CHECK-NEXT: vmov d16, r1, r0 +; CHECK-NEXT: vcmp.f64 d16, d0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov.f64 d17, #-5.000000e-01 -; CHECK-NEXT: vselgt.f64 d0, d17, d16 +; CHECK-NEXT: vmov.f64 d18, #-5.000000e-01 +; CHECK-NEXT: vselgt.f64 d0, d18, d17 ; CHECK-NEXT: bx lr %1 = fcmp nsz olt double %a0, 128.000000e+00 %2 = select i1 %1, double -5.000000e-01, double 5.000000e-01 diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll index 8bd8aa7b34dec..478b98dfac80f 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -8,27 +8,24 @@ define i32 @stest_f64i32(double %x) { ; SOFT-LABEL: stest_f64i32: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r7, lr} -; SOFT-NEXT: push {r4, r5, r7, lr} +; SOFT-NEXT: .save {r4, lr} +; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __aeabi_d2lz -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: movs r3, #0 -; SOFT-NEXT: ldr r4, .LCPI0_0 -; SOFT-NEXT: subs r5, r0, r4 -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: sbcs r5, r3 -; SOFT-NEXT: mov r5, r2 -; SOFT-NEXT: bge .LBB0_7 +; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: ldr r3, .LCPI0_0 +; SOFT-NEXT: subs r4, r0, r3 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: blt .LBB0_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB0_8 +; SOFT-NEXT: mov r1, r2 ; SOFT-NEXT: .LBB0_2: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB0_4 -; SOFT-NEXT: .LBB0_3: @ %entry -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: blt .LBB0_4 +; SOFT-NEXT: @ %bb.3: @ %entry +; SOFT-NEXT: mov r0, r3 ; SOFT-NEXT: .LBB0_4: @ %entry -; SOFT-NEXT: mvns r3, r3 +; SOFT-NEXT: mvns r3, r2 +; SOFT-NEXT: movs r2, #1 ; SOFT-NEXT: lsls r2, r2, #31 ; SOFT-NEXT: subs r4, r2, r0 ; SOFT-NEXT: sbcs r3, r1 @@ -36,18 +33,9 @@ define i32 @stest_f64i32(double %x) { ; SOFT-NEXT: @ %bb.5: @ %entry ; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: .LBB0_6: @ %entry -; SOFT-NEXT: pop {r4, r5, r7, pc} -; SOFT-NEXT: .LBB0_7: @ %entry -; SOFT-NEXT: mov r5, r3 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB0_2 -; SOFT-NEXT: .LBB0_8: @ %entry -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB0_3 -; SOFT-NEXT: b .LBB0_4 +; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.9: +; SOFT-NEXT: @ %bb.7: ; SOFT-NEXT: .LCPI0_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -135,45 +123,33 @@ define i32 @ustest_f64i32(double %x) { ; SOFT-NEXT: .save {r4, lr} ; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __aeabi_d2lz -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: movs r3, #0 +; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: mvns r3, r2 ; SOFT-NEXT: adds r4, r0, #1 ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: sbcs r4, r3 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: bge .LBB2_7 +; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: blt .LBB2_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB2_8 +; SOFT-NEXT: mov r1, r2 ; SOFT-NEXT: .LBB2_2: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB2_4 -; SOFT-NEXT: .LBB2_3: @ %entry -; SOFT-NEXT: mvns r0, r3 +; SOFT-NEXT: blt .LBB2_4 +; SOFT-NEXT: @ %bb.3: @ %entry +; SOFT-NEXT: mov r0, r3 ; SOFT-NEXT: .LBB2_4: @ %entry -; SOFT-NEXT: rsbs r4, r0, #0 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r1 -; SOFT-NEXT: bge .LBB2_9 +; SOFT-NEXT: rsbs r3, r0, #0 +; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: blt .LBB2_7 ; SOFT-NEXT: @ %bb.5: @ %entry ; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB2_10 +; SOFT-NEXT: beq .LBB2_8 ; SOFT-NEXT: .LBB2_6: @ %entry ; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB2_7: @ %entry -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB2_2 -; SOFT-NEXT: .LBB2_8: @ %entry -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB2_3 -; SOFT-NEXT: b .LBB2_4 -; SOFT-NEXT: .LBB2_9: @ %entry -; SOFT-NEXT: mov r2, r3 +; SOFT-NEXT: .LBB2_7: +; SOFT-NEXT: movs r2, #1 ; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: bne .LBB2_6 -; SOFT-NEXT: .LBB2_10: @ %entry +; SOFT-NEXT: .LBB2_8: @ %entry ; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: pop {r4, pc} ; @@ -220,27 +196,24 @@ entry: define i32 @stest_f32i32(float %x) { ; SOFT-LABEL: stest_f32i32: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r7, lr} -; SOFT-NEXT: push {r4, r5, r7, lr} +; SOFT-NEXT: .save {r4, lr} +; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: movs r3, #0 -; SOFT-NEXT: ldr r4, .LCPI3_0 -; SOFT-NEXT: subs r5, r0, r4 -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: sbcs r5, r3 -; SOFT-NEXT: mov r5, r2 -; SOFT-NEXT: bge .LBB3_7 +; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: ldr r3, .LCPI3_0 +; SOFT-NEXT: subs r4, r0, r3 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: blt .LBB3_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB3_8 +; SOFT-NEXT: mov r1, r2 ; SOFT-NEXT: .LBB3_2: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB3_4 -; SOFT-NEXT: .LBB3_3: @ %entry -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: blt .LBB3_4 +; SOFT-NEXT: @ %bb.3: @ %entry +; SOFT-NEXT: mov r0, r3 ; SOFT-NEXT: .LBB3_4: @ %entry -; SOFT-NEXT: mvns r3, r3 +; SOFT-NEXT: mvns r3, r2 +; SOFT-NEXT: movs r2, #1 ; SOFT-NEXT: lsls r2, r2, #31 ; SOFT-NEXT: subs r4, r2, r0 ; SOFT-NEXT: sbcs r3, r1 @@ -248,18 +221,9 @@ define i32 @stest_f32i32(float %x) { ; SOFT-NEXT: @ %bb.5: @ %entry ; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: .LBB3_6: @ %entry -; SOFT-NEXT: pop {r4, r5, r7, pc} -; SOFT-NEXT: .LBB3_7: @ %entry -; SOFT-NEXT: mov r5, r3 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB3_2 -; SOFT-NEXT: .LBB3_8: @ %entry -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB3_3 -; SOFT-NEXT: b .LBB3_4 +; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.9: +; SOFT-NEXT: @ %bb.7: ; SOFT-NEXT: .LCPI3_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -312,45 +276,33 @@ define i32 @ustest_f32i32(float %x) { ; SOFT-NEXT: .save {r4, lr} ; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: movs r3, #0 +; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: mvns r3, r2 ; SOFT-NEXT: adds r4, r0, #1 ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: sbcs r4, r3 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: bge .LBB5_7 +; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: blt .LBB5_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB5_8 +; SOFT-NEXT: mov r1, r2 ; SOFT-NEXT: .LBB5_2: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB5_4 -; SOFT-NEXT: .LBB5_3: @ %entry -; SOFT-NEXT: mvns r0, r3 +; SOFT-NEXT: blt .LBB5_4 +; SOFT-NEXT: @ %bb.3: @ %entry +; SOFT-NEXT: mov r0, r3 ; SOFT-NEXT: .LBB5_4: @ %entry -; SOFT-NEXT: rsbs r4, r0, #0 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r1 -; SOFT-NEXT: bge .LBB5_9 +; SOFT-NEXT: rsbs r3, r0, #0 +; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: blt .LBB5_7 ; SOFT-NEXT: @ %bb.5: @ %entry ; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB5_10 +; SOFT-NEXT: beq .LBB5_8 ; SOFT-NEXT: .LBB5_6: @ %entry ; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB5_7: @ %entry -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB5_2 -; SOFT-NEXT: .LBB5_8: @ %entry -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB5_3 -; SOFT-NEXT: b .LBB5_4 -; SOFT-NEXT: .LBB5_9: @ %entry -; SOFT-NEXT: mov r2, r3 +; SOFT-NEXT: .LBB5_7: +; SOFT-NEXT: movs r2, #1 ; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: bne .LBB5_6 -; SOFT-NEXT: .LBB5_10: @ %entry +; SOFT-NEXT: .LBB5_8: @ %entry ; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: pop {r4, pc} ; @@ -372,29 +324,26 @@ entry: define i32 @stest_f16i32(half %x) { ; SOFT-LABEL: stest_f16i32: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r7, lr} -; SOFT-NEXT: push {r4, r5, r7, lr} +; SOFT-NEXT: .save {r4, lr} +; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: movs r3, #0 -; SOFT-NEXT: ldr r4, .LCPI6_0 -; SOFT-NEXT: subs r5, r0, r4 -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: sbcs r5, r3 -; SOFT-NEXT: mov r5, r2 -; SOFT-NEXT: bge .LBB6_7 +; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: ldr r3, .LCPI6_0 +; SOFT-NEXT: subs r4, r0, r3 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: blt .LBB6_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB6_8 +; SOFT-NEXT: mov r1, r2 ; SOFT-NEXT: .LBB6_2: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB6_4 -; SOFT-NEXT: .LBB6_3: @ %entry -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: blt .LBB6_4 +; SOFT-NEXT: @ %bb.3: @ %entry +; SOFT-NEXT: mov r0, r3 ; SOFT-NEXT: .LBB6_4: @ %entry -; SOFT-NEXT: mvns r3, r3 +; SOFT-NEXT: mvns r3, r2 +; SOFT-NEXT: movs r2, #1 ; SOFT-NEXT: lsls r2, r2, #31 ; SOFT-NEXT: subs r4, r2, r0 ; SOFT-NEXT: sbcs r3, r1 @@ -402,18 +351,9 @@ define i32 @stest_f16i32(half %x) { ; SOFT-NEXT: @ %bb.5: @ %entry ; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: .LBB6_6: @ %entry -; SOFT-NEXT: pop {r4, r5, r7, pc} -; SOFT-NEXT: .LBB6_7: @ %entry -; SOFT-NEXT: mov r5, r3 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB6_2 -; SOFT-NEXT: .LBB6_8: @ %entry -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB6_3 -; SOFT-NEXT: b .LBB6_4 +; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.9: +; SOFT-NEXT: @ %bb.7: ; SOFT-NEXT: .LCPI6_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -492,45 +432,33 @@ define i32 @ustest_f16i32(half %x) { ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: movs r3, #0 +; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: mvns r3, r2 ; SOFT-NEXT: adds r4, r0, #1 ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: sbcs r4, r3 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: bge .LBB8_7 +; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: blt .LBB8_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB8_8 +; SOFT-NEXT: mov r1, r2 ; SOFT-NEXT: .LBB8_2: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB8_4 -; SOFT-NEXT: .LBB8_3: @ %entry -; SOFT-NEXT: mvns r0, r3 +; SOFT-NEXT: blt .LBB8_4 +; SOFT-NEXT: @ %bb.3: @ %entry +; SOFT-NEXT: mov r0, r3 ; SOFT-NEXT: .LBB8_4: @ %entry -; SOFT-NEXT: rsbs r4, r0, #0 -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: sbcs r4, r1 -; SOFT-NEXT: bge .LBB8_9 +; SOFT-NEXT: rsbs r3, r0, #0 +; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: sbcs r3, r1 +; SOFT-NEXT: blt .LBB8_7 ; SOFT-NEXT: @ %bb.5: @ %entry ; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB8_10 +; SOFT-NEXT: beq .LBB8_8 ; SOFT-NEXT: .LBB8_6: @ %entry ; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB8_7: @ %entry -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB8_2 -; SOFT-NEXT: .LBB8_8: @ %entry -; SOFT-NEXT: mov r1, r3 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB8_3 -; SOFT-NEXT: b .LBB8_4 -; SOFT-NEXT: .LBB8_9: @ %entry -; SOFT-NEXT: mov r2, r3 +; SOFT-NEXT: .LBB8_7: +; SOFT-NEXT: movs r2, #1 ; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: bne .LBB8_6 -; SOFT-NEXT: .LBB8_10: @ %entry +; SOFT-NEXT: .LBB8_8: @ %entry ; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: pop {r4, pc} ; @@ -990,86 +918,62 @@ define i64 @stest_f64i64(double %x) { ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 +; SOFT-NEXT: .pad #4 +; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: movs r4, #1 -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: ldr r0, .LCPI18_0 -; SOFT-NEXT: adds r7, r6, #1 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: mvns r5, r4 +; SOFT-NEXT: ldr r6, .LCPI18_0 +; SOFT-NEXT: adds r7, r0, #1 ; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: sbcs r7, r0 +; SOFT-NEXT: sbcs r7, r6 ; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: sbcs r7, r5 +; SOFT-NEXT: sbcs r7, r4 ; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: sbcs r7, r5 -; SOFT-NEXT: mov r7, r4 -; SOFT-NEXT: bge .LBB18_13 +; SOFT-NEXT: sbcs r7, r4 +; SOFT-NEXT: bge .LBB18_8 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB18_14 +; SOFT-NEXT: bge .LBB18_9 ; SOFT-NEXT: .LBB18_2: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB18_4 +; SOFT-NEXT: bge .LBB18_10 ; SOFT-NEXT: .LBB18_3: @ %entry -; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: blt .LBB18_5 ; SOFT-NEXT: .LBB18_4: @ %entry -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB18_6 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r1, r0 -; SOFT-NEXT: .LBB18_6: @ %entry -; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: mvns r0, r5 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB18_8 -; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: .LBB18_8: @ %entry -; SOFT-NEXT: lsls r3, r4, #31 -; SOFT-NEXT: rsbs r7, r6, #0 -; SOFT-NEXT: mov r7, r3 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: .LBB18_5: @ %entry +; SOFT-NEXT: movs r6, #1 +; SOFT-NEXT: lsls r6, r6, #31 +; SOFT-NEXT: rsbs r7, r0, #0 +; SOFT-NEXT: mov r7, r6 ; SOFT-NEXT: sbcs r7, r1 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: mov r7, r5 ; SOFT-NEXT: sbcs r7, r2 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: sbcs r0, r2 -; SOFT-NEXT: bge .LBB18_15 -; SOFT-NEXT: @ %bb.9: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB18_16 +; SOFT-NEXT: sbcs r5, r3 +; SOFT-NEXT: bge .LBB18_11 +; SOFT-NEXT: @ %bb.6: @ %entry +; SOFT-NEXT: bge .LBB18_12 +; SOFT-NEXT: .LBB18_7: @ %entry +; SOFT-NEXT: add sp, #4 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB18_8: @ %entry +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: blt .LBB18_2 +; SOFT-NEXT: .LBB18_9: @ %entry +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: blt .LBB18_3 ; SOFT-NEXT: .LBB18_10: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB18_12 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: bge .LBB18_4 +; SOFT-NEXT: b .LBB18_5 ; SOFT-NEXT: .LBB18_11: @ %entry -; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: blt .LBB18_7 ; SOFT-NEXT: .LBB18_12: @ %entry -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB18_13: @ %entry -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB18_2 -; SOFT-NEXT: .LBB18_14: @ %entry -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB18_3 -; SOFT-NEXT: b .LBB18_4 -; SOFT-NEXT: .LBB18_15: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB18_10 -; SOFT-NEXT: .LBB18_16: @ %entry -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB18_11 -; SOFT-NEXT: b .LBB18_12 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.17: +; SOFT-NEXT: @ %bb.13: ; SOFT-NEXT: .LCPI18_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -1100,13 +1004,9 @@ define i64 @stest_f64i64(double %x) { ; VFP2-NEXT: sbcs.w r5, lr, r1 ; VFP2-NEXT: sbcs.w r4, r2, r4 ; VFP2-NEXT: sbcs r2, r3 -; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, lr +; VFP2-NEXT: itt ge +; VFP2-NEXT: movge r0, r12 +; VFP2-NEXT: movge r1, lr ; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FULL-LABEL: stest_f64i64: @@ -1131,11 +1031,9 @@ define i64 @stest_f64i64(double %x) { ; FULL-NEXT: sbcs.w r4, r12, r1 ; FULL-NEXT: sbcs.w r2, r3, r2 ; FULL-NEXT: sbcs.w r2, r3, r5 -; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: it eq -; FULL-NEXT: moveq r0, #0 -; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: it ge +; FULL-NEXT: movge r0, #0 +; FULL-NEXT: csel r1, r1, r12, lt ; FULL-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi double %x to i128 @@ -1156,24 +1054,15 @@ define i64 @utest_f64i64(double %x) { ; SOFT-NEXT: movs r4, #0 ; SOFT-NEXT: subs r2, r2, #1 ; SOFT-NEXT: sbcs r3, r4 -; SOFT-NEXT: blo .LBB19_2 +; SOFT-NEXT: bhs .LBB19_3 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB19_3 -; SOFT-NEXT: b .LBB19_4 -; SOFT-NEXT: .LBB19_2: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB19_4 +; SOFT-NEXT: bhs .LBB19_4 +; SOFT-NEXT: .LBB19_2: @ %entry +; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .LBB19_3: @ %entry ; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: blo .LBB19_2 ; SOFT-NEXT: .LBB19_4: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB19_6 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB19_6: @ %entry ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: pop {r4, pc} ; @@ -1185,13 +1074,9 @@ define i64 @utest_f64i64(double %x) { ; VFP2-NEXT: subs r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, r12 +; VFP2-NEXT: itt hs +; VFP2-NEXT: movhs r0, r12 +; VFP2-NEXT: movhs r1, r12 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: utest_f64i64: @@ -1200,12 +1085,10 @@ define i64 @utest_f64i64(double %x) { ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixunsdfti ; FULL-NEXT: subs r2, #1 +; FULL-NEXT: mov.w r12, #0 ; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: mov.w r3, #0 -; FULL-NEXT: cset r2, lo -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r3, ne -; FULL-NEXT: csel r1, r1, r3, ne +; FULL-NEXT: csel r0, r0, r12, lo +; FULL-NEXT: csel r1, r1, r12, lo ; FULL-NEXT: pop {r7, pc} entry: %conv = fptoui double %x to i128 @@ -1226,23 +1109,16 @@ define i64 @ustest_f64i64(double %x) { ; SOFT-NEXT: subs r6, r2, #1 ; SOFT-NEXT: mov r6, r3 ; SOFT-NEXT: sbcs r6, r5 -; SOFT-NEXT: mov r6, r4 -; SOFT-NEXT: bge .LBB20_10 +; SOFT-NEXT: bge .LBB20_9 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB20_11 +; SOFT-NEXT: bge .LBB20_10 ; SOFT-NEXT: .LBB20_2: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB20_12 +; SOFT-NEXT: bge .LBB20_11 ; SOFT-NEXT: .LBB20_3: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB20_13 +; SOFT-NEXT: blt .LBB20_5 ; SOFT-NEXT: .LBB20_4: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB20_6 -; SOFT-NEXT: .LBB20_5: @ %entry ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: .LBB20_6: @ %entry +; SOFT-NEXT: .LBB20_5: @ %entry ; SOFT-NEXT: rsbs r6, r0, #0 ; SOFT-NEXT: mov r6, r5 ; SOFT-NEXT: sbcs r6, r1 @@ -1250,41 +1126,32 @@ define i64 @ustest_f64i64(double %x) { ; SOFT-NEXT: sbcs r6, r2 ; SOFT-NEXT: mov r2, r5 ; SOFT-NEXT: sbcs r2, r3 -; SOFT-NEXT: bge .LBB20_14 -; SOFT-NEXT: @ %bb.7: @ %entry +; SOFT-NEXT: bge .LBB20_12 +; SOFT-NEXT: @ %bb.6: @ %entry ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB20_15 +; SOFT-NEXT: beq .LBB20_13 +; SOFT-NEXT: .LBB20_7: @ %entry +; SOFT-NEXT: beq .LBB20_14 ; SOFT-NEXT: .LBB20_8: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB20_16 -; SOFT-NEXT: .LBB20_9: @ %entry ; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB20_10: @ %entry -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB20_2 -; SOFT-NEXT: .LBB20_11: @ %entry +; SOFT-NEXT: .LBB20_9: @ %entry ; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB20_3 -; SOFT-NEXT: .LBB20_12: @ %entry +; SOFT-NEXT: blt .LBB20_2 +; SOFT-NEXT: .LBB20_10: @ %entry ; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB20_4 -; SOFT-NEXT: .LBB20_13: @ %entry +; SOFT-NEXT: blt .LBB20_3 +; SOFT-NEXT: .LBB20_11: @ %entry ; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB20_5 -; SOFT-NEXT: b .LBB20_6 -; SOFT-NEXT: .LBB20_14: @ %entry +; SOFT-NEXT: bge .LBB20_4 +; SOFT-NEXT: b .LBB20_5 +; SOFT-NEXT: .LBB20_12: @ %entry ; SOFT-NEXT: mov r4, r5 ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB20_8 -; SOFT-NEXT: .LBB20_15: @ %entry +; SOFT-NEXT: bne .LBB20_7 +; SOFT-NEXT: .LBB20_13: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB20_9 -; SOFT-NEXT: .LBB20_16: @ %entry +; SOFT-NEXT: bne .LBB20_8 +; SOFT-NEXT: .LBB20_14: @ %entry ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: pop {r4, r5, r6, pc} ; @@ -1296,15 +1163,11 @@ define i64 @ustest_f64i64(double %x) { ; VFP2-NEXT: subs.w lr, r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs lr, r3, #0 -; VFP2-NEXT: mov.w lr, #0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w lr, #1 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: itttt eq -; VFP2-NEXT: moveq r3, r12 -; VFP2-NEXT: moveq r2, #1 -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: itttt ge +; VFP2-NEXT: movge r3, r12 +; VFP2-NEXT: movge r2, #1 +; VFP2-NEXT: movge r1, r12 +; VFP2-NEXT: movge r0, r12 ; VFP2-NEXT: rsbs.w lr, r0, #0 ; VFP2-NEXT: sbcs.w lr, r12, r1 ; VFP2-NEXT: sbcs.w r2, r12, r2 @@ -1322,20 +1185,18 @@ define i64 @ustest_f64i64(double %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixdfti -; FULL-NEXT: subs.w r12, r2, #1 -; FULL-NEXT: mov.w lr, #0 -; FULL-NEXT: sbcs r12, r3, #0 -; FULL-NEXT: cset r12, lt -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: it eq -; FULL-NEXT: moveq r2, #1 -; FULL-NEXT: csel r0, r0, lr, ne -; FULL-NEXT: csel r12, r3, lr, ne -; FULL-NEXT: csel r1, r1, lr, ne +; FULL-NEXT: subs.w lr, r2, #1 +; FULL-NEXT: mov.w r12, #0 +; FULL-NEXT: sbcs lr, r3, #0 +; FULL-NEXT: it ge +; FULL-NEXT: movge r2, #1 +; FULL-NEXT: csel r0, r0, r12, lt +; FULL-NEXT: csel lr, r3, r12, lt +; FULL-NEXT: csel r1, r1, r12, lt ; FULL-NEXT: rsbs r3, r0, #0 -; FULL-NEXT: sbcs.w r3, lr, r1 -; FULL-NEXT: sbcs.w r2, lr, r2 -; FULL-NEXT: sbcs.w r2, lr, r12 +; FULL-NEXT: sbcs.w r3, r12, r1 +; FULL-NEXT: sbcs.w r2, r12, r2 +; FULL-NEXT: sbcs.w r2, r12, lr ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r0, r0, r2, ne @@ -1356,86 +1217,62 @@ define i64 @stest_f32i64(float %x) { ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 +; SOFT-NEXT: .pad #4 +; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: movs r4, #1 -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: ldr r0, .LCPI21_0 -; SOFT-NEXT: adds r7, r6, #1 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: mvns r5, r4 +; SOFT-NEXT: ldr r6, .LCPI21_0 +; SOFT-NEXT: adds r7, r0, #1 ; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: sbcs r7, r0 +; SOFT-NEXT: sbcs r7, r6 ; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: sbcs r7, r5 +; SOFT-NEXT: sbcs r7, r4 ; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: sbcs r7, r5 -; SOFT-NEXT: mov r7, r4 -; SOFT-NEXT: bge .LBB21_13 +; SOFT-NEXT: sbcs r7, r4 +; SOFT-NEXT: bge .LBB21_8 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB21_14 +; SOFT-NEXT: bge .LBB21_9 ; SOFT-NEXT: .LBB21_2: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB21_4 +; SOFT-NEXT: bge .LBB21_10 ; SOFT-NEXT: .LBB21_3: @ %entry -; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: blt .LBB21_5 ; SOFT-NEXT: .LBB21_4: @ %entry -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB21_6 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r1, r0 -; SOFT-NEXT: .LBB21_6: @ %entry -; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: mvns r0, r5 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB21_8 -; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: .LBB21_8: @ %entry -; SOFT-NEXT: lsls r3, r4, #31 -; SOFT-NEXT: rsbs r7, r6, #0 -; SOFT-NEXT: mov r7, r3 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: .LBB21_5: @ %entry +; SOFT-NEXT: movs r6, #1 +; SOFT-NEXT: lsls r6, r6, #31 +; SOFT-NEXT: rsbs r7, r0, #0 +; SOFT-NEXT: mov r7, r6 ; SOFT-NEXT: sbcs r7, r1 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: mov r7, r5 ; SOFT-NEXT: sbcs r7, r2 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: sbcs r0, r2 -; SOFT-NEXT: bge .LBB21_15 -; SOFT-NEXT: @ %bb.9: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB21_16 +; SOFT-NEXT: sbcs r5, r3 +; SOFT-NEXT: bge .LBB21_11 +; SOFT-NEXT: @ %bb.6: @ %entry +; SOFT-NEXT: bge .LBB21_12 +; SOFT-NEXT: .LBB21_7: @ %entry +; SOFT-NEXT: add sp, #4 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB21_8: @ %entry +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: blt .LBB21_2 +; SOFT-NEXT: .LBB21_9: @ %entry +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: blt .LBB21_3 ; SOFT-NEXT: .LBB21_10: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB21_12 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: bge .LBB21_4 +; SOFT-NEXT: b .LBB21_5 ; SOFT-NEXT: .LBB21_11: @ %entry -; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: blt .LBB21_7 ; SOFT-NEXT: .LBB21_12: @ %entry -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB21_13: @ %entry -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB21_2 -; SOFT-NEXT: .LBB21_14: @ %entry -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB21_3 -; SOFT-NEXT: b .LBB21_4 -; SOFT-NEXT: .LBB21_15: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB21_10 -; SOFT-NEXT: .LBB21_16: @ %entry -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB21_11 -; SOFT-NEXT: b .LBB21_12 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.17: +; SOFT-NEXT: @ %bb.13: ; SOFT-NEXT: .LCPI21_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -1466,13 +1303,9 @@ define i64 @stest_f32i64(float %x) { ; VFP2-NEXT: sbcs.w r5, lr, r1 ; VFP2-NEXT: sbcs.w r4, r2, r4 ; VFP2-NEXT: sbcs r2, r3 -; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, lr +; VFP2-NEXT: itt ge +; VFP2-NEXT: movge r0, r12 +; VFP2-NEXT: movge r1, lr ; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FULL-LABEL: stest_f32i64: @@ -1497,11 +1330,9 @@ define i64 @stest_f32i64(float %x) { ; FULL-NEXT: sbcs.w r4, r12, r1 ; FULL-NEXT: sbcs.w r2, r3, r2 ; FULL-NEXT: sbcs.w r2, r3, r5 -; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: it eq -; FULL-NEXT: moveq r0, #0 -; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: it ge +; FULL-NEXT: movge r0, #0 +; FULL-NEXT: csel r1, r1, r12, lt ; FULL-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi float %x to i128 @@ -1522,24 +1353,15 @@ define i64 @utest_f32i64(float %x) { ; SOFT-NEXT: movs r4, #0 ; SOFT-NEXT: subs r2, r2, #1 ; SOFT-NEXT: sbcs r3, r4 -; SOFT-NEXT: blo .LBB22_2 +; SOFT-NEXT: bhs .LBB22_3 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB22_3 -; SOFT-NEXT: b .LBB22_4 -; SOFT-NEXT: .LBB22_2: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB22_4 +; SOFT-NEXT: bhs .LBB22_4 +; SOFT-NEXT: .LBB22_2: @ %entry +; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .LBB22_3: @ %entry ; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: blo .LBB22_2 ; SOFT-NEXT: .LBB22_4: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB22_6 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB22_6: @ %entry ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: pop {r4, pc} ; @@ -1551,13 +1373,9 @@ define i64 @utest_f32i64(float %x) { ; VFP2-NEXT: subs r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, r12 +; VFP2-NEXT: itt hs +; VFP2-NEXT: movhs r0, r12 +; VFP2-NEXT: movhs r1, r12 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: utest_f32i64: @@ -1566,12 +1384,10 @@ define i64 @utest_f32i64(float %x) { ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixunssfti ; FULL-NEXT: subs r2, #1 +; FULL-NEXT: mov.w r12, #0 ; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: mov.w r3, #0 -; FULL-NEXT: cset r2, lo -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r3, ne -; FULL-NEXT: csel r1, r1, r3, ne +; FULL-NEXT: csel r0, r0, r12, lo +; FULL-NEXT: csel r1, r1, r12, lo ; FULL-NEXT: pop {r7, pc} entry: %conv = fptoui float %x to i128 @@ -1592,23 +1408,16 @@ define i64 @ustest_f32i64(float %x) { ; SOFT-NEXT: subs r6, r2, #1 ; SOFT-NEXT: mov r6, r3 ; SOFT-NEXT: sbcs r6, r5 -; SOFT-NEXT: mov r6, r4 -; SOFT-NEXT: bge .LBB23_10 +; SOFT-NEXT: bge .LBB23_9 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB23_11 +; SOFT-NEXT: bge .LBB23_10 ; SOFT-NEXT: .LBB23_2: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB23_12 +; SOFT-NEXT: bge .LBB23_11 ; SOFT-NEXT: .LBB23_3: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB23_13 +; SOFT-NEXT: blt .LBB23_5 ; SOFT-NEXT: .LBB23_4: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB23_6 -; SOFT-NEXT: .LBB23_5: @ %entry ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: .LBB23_6: @ %entry +; SOFT-NEXT: .LBB23_5: @ %entry ; SOFT-NEXT: rsbs r6, r0, #0 ; SOFT-NEXT: mov r6, r5 ; SOFT-NEXT: sbcs r6, r1 @@ -1616,41 +1425,32 @@ define i64 @ustest_f32i64(float %x) { ; SOFT-NEXT: sbcs r6, r2 ; SOFT-NEXT: mov r2, r5 ; SOFT-NEXT: sbcs r2, r3 -; SOFT-NEXT: bge .LBB23_14 -; SOFT-NEXT: @ %bb.7: @ %entry +; SOFT-NEXT: bge .LBB23_12 +; SOFT-NEXT: @ %bb.6: @ %entry ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB23_15 +; SOFT-NEXT: beq .LBB23_13 +; SOFT-NEXT: .LBB23_7: @ %entry +; SOFT-NEXT: beq .LBB23_14 ; SOFT-NEXT: .LBB23_8: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB23_16 -; SOFT-NEXT: .LBB23_9: @ %entry ; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB23_10: @ %entry -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB23_2 -; SOFT-NEXT: .LBB23_11: @ %entry +; SOFT-NEXT: .LBB23_9: @ %entry ; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB23_3 -; SOFT-NEXT: .LBB23_12: @ %entry +; SOFT-NEXT: blt .LBB23_2 +; SOFT-NEXT: .LBB23_10: @ %entry ; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB23_4 -; SOFT-NEXT: .LBB23_13: @ %entry +; SOFT-NEXT: blt .LBB23_3 +; SOFT-NEXT: .LBB23_11: @ %entry ; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB23_5 -; SOFT-NEXT: b .LBB23_6 -; SOFT-NEXT: .LBB23_14: @ %entry +; SOFT-NEXT: bge .LBB23_4 +; SOFT-NEXT: b .LBB23_5 +; SOFT-NEXT: .LBB23_12: @ %entry ; SOFT-NEXT: mov r4, r5 ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB23_8 -; SOFT-NEXT: .LBB23_15: @ %entry +; SOFT-NEXT: bne .LBB23_7 +; SOFT-NEXT: .LBB23_13: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB23_9 -; SOFT-NEXT: .LBB23_16: @ %entry +; SOFT-NEXT: bne .LBB23_8 +; SOFT-NEXT: .LBB23_14: @ %entry ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: pop {r4, r5, r6, pc} ; @@ -1662,15 +1462,11 @@ define i64 @ustest_f32i64(float %x) { ; VFP2-NEXT: subs.w lr, r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs lr, r3, #0 -; VFP2-NEXT: mov.w lr, #0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w lr, #1 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: itttt eq -; VFP2-NEXT: moveq r3, r12 -; VFP2-NEXT: moveq r2, #1 -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: itttt ge +; VFP2-NEXT: movge r3, r12 +; VFP2-NEXT: movge r2, #1 +; VFP2-NEXT: movge r1, r12 +; VFP2-NEXT: movge r0, r12 ; VFP2-NEXT: rsbs.w lr, r0, #0 ; VFP2-NEXT: sbcs.w lr, r12, r1 ; VFP2-NEXT: sbcs.w r2, r12, r2 @@ -1688,20 +1484,18 @@ define i64 @ustest_f32i64(float %x) { ; FULL-NEXT: .save {r7, lr} ; FULL-NEXT: push {r7, lr} ; FULL-NEXT: bl __fixsfti -; FULL-NEXT: subs.w r12, r2, #1 -; FULL-NEXT: mov.w lr, #0 -; FULL-NEXT: sbcs r12, r3, #0 -; FULL-NEXT: cset r12, lt -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: it eq -; FULL-NEXT: moveq r2, #1 -; FULL-NEXT: csel r0, r0, lr, ne -; FULL-NEXT: csel r12, r3, lr, ne -; FULL-NEXT: csel r1, r1, lr, ne +; FULL-NEXT: subs.w lr, r2, #1 +; FULL-NEXT: mov.w r12, #0 +; FULL-NEXT: sbcs lr, r3, #0 +; FULL-NEXT: it ge +; FULL-NEXT: movge r2, #1 +; FULL-NEXT: csel r0, r0, r12, lt +; FULL-NEXT: csel lr, r3, r12, lt +; FULL-NEXT: csel r1, r1, r12, lt ; FULL-NEXT: rsbs r3, r0, #0 -; FULL-NEXT: sbcs.w r3, lr, r1 -; FULL-NEXT: sbcs.w r2, lr, r2 -; FULL-NEXT: sbcs.w r2, lr, r12 +; FULL-NEXT: sbcs.w r3, r12, r1 +; FULL-NEXT: sbcs.w r2, r12, r2 +; FULL-NEXT: sbcs.w r2, r12, lr ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r0, r0, r2, ne @@ -1722,88 +1516,64 @@ define i64 @stest_f16i64(half %x) { ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 +; SOFT-NEXT: .pad #4 +; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: movs r4, #1 -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: ldr r0, .LCPI24_0 -; SOFT-NEXT: adds r7, r6, #1 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: mvns r5, r4 +; SOFT-NEXT: ldr r6, .LCPI24_0 +; SOFT-NEXT: adds r7, r0, #1 ; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: sbcs r7, r0 +; SOFT-NEXT: sbcs r7, r6 ; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: sbcs r7, r5 +; SOFT-NEXT: sbcs r7, r4 ; SOFT-NEXT: mov r7, r3 -; SOFT-NEXT: sbcs r7, r5 -; SOFT-NEXT: mov r7, r4 -; SOFT-NEXT: bge .LBB24_13 +; SOFT-NEXT: sbcs r7, r4 +; SOFT-NEXT: bge .LBB24_8 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB24_14 +; SOFT-NEXT: bge .LBB24_9 ; SOFT-NEXT: .LBB24_2: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB24_4 +; SOFT-NEXT: bge .LBB24_10 ; SOFT-NEXT: .LBB24_3: @ %entry -; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: blt .LBB24_5 ; SOFT-NEXT: .LBB24_4: @ %entry -; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB24_6 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r1, r0 -; SOFT-NEXT: .LBB24_6: @ %entry -; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: mvns r0, r5 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB24_8 -; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: .LBB24_8: @ %entry -; SOFT-NEXT: lsls r3, r4, #31 -; SOFT-NEXT: rsbs r7, r6, #0 -; SOFT-NEXT: mov r7, r3 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: .LBB24_5: @ %entry +; SOFT-NEXT: movs r6, #1 +; SOFT-NEXT: lsls r6, r6, #31 +; SOFT-NEXT: rsbs r7, r0, #0 +; SOFT-NEXT: mov r7, r6 ; SOFT-NEXT: sbcs r7, r1 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: mov r7, r5 ; SOFT-NEXT: sbcs r7, r2 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: sbcs r0, r2 -; SOFT-NEXT: bge .LBB24_15 -; SOFT-NEXT: @ %bb.9: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB24_16 +; SOFT-NEXT: sbcs r5, r3 +; SOFT-NEXT: bge .LBB24_11 +; SOFT-NEXT: @ %bb.6: @ %entry +; SOFT-NEXT: bge .LBB24_12 +; SOFT-NEXT: .LBB24_7: @ %entry +; SOFT-NEXT: add sp, #4 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB24_8: @ %entry +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: blt .LBB24_2 +; SOFT-NEXT: .LBB24_9: @ %entry +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: blt .LBB24_3 ; SOFT-NEXT: .LBB24_10: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB24_12 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: bge .LBB24_4 +; SOFT-NEXT: b .LBB24_5 ; SOFT-NEXT: .LBB24_11: @ %entry -; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: blt .LBB24_7 ; SOFT-NEXT: .LBB24_12: @ %entry -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB24_13: @ %entry -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB24_2 -; SOFT-NEXT: .LBB24_14: @ %entry -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB24_3 -; SOFT-NEXT: b .LBB24_4 -; SOFT-NEXT: .LBB24_15: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB24_10 -; SOFT-NEXT: .LBB24_16: @ %entry -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB24_11 -; SOFT-NEXT: b .LBB24_12 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.17: +; SOFT-NEXT: @ %bb.13: ; SOFT-NEXT: .LCPI24_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -1837,13 +1607,9 @@ define i64 @stest_f16i64(half %x) { ; VFP2-NEXT: sbcs.w r5, lr, r1 ; VFP2-NEXT: sbcs.w r4, r2, r4 ; VFP2-NEXT: sbcs r2, r3 -; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, lr +; VFP2-NEXT: itt ge +; VFP2-NEXT: movge r0, r12 +; VFP2-NEXT: movge r1, lr ; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FULL-LABEL: stest_f16i64: @@ -1870,11 +1636,9 @@ define i64 @stest_f16i64(half %x) { ; FULL-NEXT: sbcs.w r4, r12, r1 ; FULL-NEXT: sbcs.w r2, r3, r2 ; FULL-NEXT: sbcs.w r2, r3, r5 -; FULL-NEXT: cset r2, lt -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: it eq -; FULL-NEXT: moveq r0, #0 -; FULL-NEXT: csel r1, r1, r12, ne +; FULL-NEXT: it ge +; FULL-NEXT: movge r0, #0 +; FULL-NEXT: csel r1, r1, r12, lt ; FULL-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi half %x to i128 @@ -1897,24 +1661,15 @@ define i64 @utesth_f16i64(half %x) { ; SOFT-NEXT: movs r4, #0 ; SOFT-NEXT: subs r2, r2, #1 ; SOFT-NEXT: sbcs r3, r4 -; SOFT-NEXT: blo .LBB25_2 +; SOFT-NEXT: bhs .LBB25_3 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB25_3 -; SOFT-NEXT: b .LBB25_4 -; SOFT-NEXT: .LBB25_2: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB25_4 +; SOFT-NEXT: bhs .LBB25_4 +; SOFT-NEXT: .LBB25_2: @ %entry +; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .LBB25_3: @ %entry ; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: blo .LBB25_2 ; SOFT-NEXT: .LBB25_4: @ %entry -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB25_6 -; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: pop {r4, pc} -; SOFT-NEXT: .LBB25_6: @ %entry ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: pop {r4, pc} ; @@ -1929,13 +1684,9 @@ define i64 @utesth_f16i64(half %x) { ; VFP2-NEXT: subs r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs r2, r3, #0 -; VFP2-NEXT: mov.w r2, #0 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r2, #1 -; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r1, r12 +; VFP2-NEXT: itt hs +; VFP2-NEXT: movhs r0, r12 +; VFP2-NEXT: movhs r1, r12 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: utesth_f16i64: @@ -1946,12 +1697,10 @@ define i64 @utesth_f16i64(half %x) { ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixunshfti ; FULL-NEXT: subs r2, #1 +; FULL-NEXT: mov.w r12, #0 ; FULL-NEXT: sbcs r2, r3, #0 -; FULL-NEXT: mov.w r3, #0 -; FULL-NEXT: cset r2, lo -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r3, ne -; FULL-NEXT: csel r1, r1, r3, ne +; FULL-NEXT: csel r0, r0, r12, lo +; FULL-NEXT: csel r1, r1, r12, lo ; FULL-NEXT: pop {r7, pc} entry: %conv = fptoui half %x to i128 @@ -1974,23 +1723,16 @@ define i64 @ustest_f16i64(half %x) { ; SOFT-NEXT: subs r6, r2, #1 ; SOFT-NEXT: mov r6, r3 ; SOFT-NEXT: sbcs r6, r5 -; SOFT-NEXT: mov r6, r4 -; SOFT-NEXT: bge .LBB26_10 +; SOFT-NEXT: bge .LBB26_9 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB26_11 +; SOFT-NEXT: bge .LBB26_10 ; SOFT-NEXT: .LBB26_2: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB26_12 +; SOFT-NEXT: bge .LBB26_11 ; SOFT-NEXT: .LBB26_3: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB26_13 +; SOFT-NEXT: blt .LBB26_5 ; SOFT-NEXT: .LBB26_4: @ %entry -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB26_6 -; SOFT-NEXT: .LBB26_5: @ %entry ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: .LBB26_6: @ %entry +; SOFT-NEXT: .LBB26_5: @ %entry ; SOFT-NEXT: rsbs r6, r0, #0 ; SOFT-NEXT: mov r6, r5 ; SOFT-NEXT: sbcs r6, r1 @@ -1998,41 +1740,32 @@ define i64 @ustest_f16i64(half %x) { ; SOFT-NEXT: sbcs r6, r2 ; SOFT-NEXT: mov r2, r5 ; SOFT-NEXT: sbcs r2, r3 -; SOFT-NEXT: bge .LBB26_14 -; SOFT-NEXT: @ %bb.7: @ %entry +; SOFT-NEXT: bge .LBB26_12 +; SOFT-NEXT: @ %bb.6: @ %entry ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB26_15 +; SOFT-NEXT: beq .LBB26_13 +; SOFT-NEXT: .LBB26_7: @ %entry +; SOFT-NEXT: beq .LBB26_14 ; SOFT-NEXT: .LBB26_8: @ %entry -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB26_16 -; SOFT-NEXT: .LBB26_9: @ %entry ; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB26_10: @ %entry -; SOFT-NEXT: mov r6, r5 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB26_2 -; SOFT-NEXT: .LBB26_11: @ %entry +; SOFT-NEXT: .LBB26_9: @ %entry ; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB26_3 -; SOFT-NEXT: .LBB26_12: @ %entry +; SOFT-NEXT: blt .LBB26_2 +; SOFT-NEXT: .LBB26_10: @ %entry ; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB26_4 -; SOFT-NEXT: .LBB26_13: @ %entry +; SOFT-NEXT: blt .LBB26_3 +; SOFT-NEXT: .LBB26_11: @ %entry ; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB26_5 -; SOFT-NEXT: b .LBB26_6 -; SOFT-NEXT: .LBB26_14: @ %entry +; SOFT-NEXT: bge .LBB26_4 +; SOFT-NEXT: b .LBB26_5 +; SOFT-NEXT: .LBB26_12: @ %entry ; SOFT-NEXT: mov r4, r5 ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB26_8 -; SOFT-NEXT: .LBB26_15: @ %entry +; SOFT-NEXT: bne .LBB26_7 +; SOFT-NEXT: .LBB26_13: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB26_9 -; SOFT-NEXT: .LBB26_16: @ %entry +; SOFT-NEXT: bne .LBB26_8 +; SOFT-NEXT: .LBB26_14: @ %entry ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: pop {r4, r5, r6, pc} ; @@ -2047,15 +1780,11 @@ define i64 @ustest_f16i64(half %x) { ; VFP2-NEXT: subs.w lr, r2, #1 ; VFP2-NEXT: mov.w r12, #0 ; VFP2-NEXT: sbcs lr, r3, #0 -; VFP2-NEXT: mov.w lr, #0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w lr, #1 -; VFP2-NEXT: cmp.w lr, #0 -; VFP2-NEXT: itttt eq -; VFP2-NEXT: moveq r3, r12 -; VFP2-NEXT: moveq r2, #1 -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: itttt ge +; VFP2-NEXT: movge r3, r12 +; VFP2-NEXT: movge r2, #1 +; VFP2-NEXT: movge r1, r12 +; VFP2-NEXT: movge r0, r12 ; VFP2-NEXT: rsbs.w lr, r0, #0 ; VFP2-NEXT: sbcs.w lr, r12, r1 ; VFP2-NEXT: sbcs.w r2, r12, r2 @@ -2075,20 +1804,18 @@ define i64 @ustest_f16i64(half %x) { ; FULL-NEXT: vmov.f16 r0, s0 ; FULL-NEXT: vmov s0, r0 ; FULL-NEXT: bl __fixhfti -; FULL-NEXT: subs.w r12, r2, #1 -; FULL-NEXT: mov.w lr, #0 -; FULL-NEXT: sbcs r12, r3, #0 -; FULL-NEXT: cset r12, lt -; FULL-NEXT: cmp.w r12, #0 -; FULL-NEXT: it eq -; FULL-NEXT: moveq r2, #1 -; FULL-NEXT: csel r0, r0, lr, ne -; FULL-NEXT: csel r12, r3, lr, ne -; FULL-NEXT: csel r1, r1, lr, ne +; FULL-NEXT: subs.w lr, r2, #1 +; FULL-NEXT: mov.w r12, #0 +; FULL-NEXT: sbcs lr, r3, #0 +; FULL-NEXT: it ge +; FULL-NEXT: movge r2, #1 +; FULL-NEXT: csel r0, r0, r12, lt +; FULL-NEXT: csel lr, r3, r12, lt +; FULL-NEXT: csel r1, r1, r12, lt ; FULL-NEXT: rsbs r3, r0, #0 -; FULL-NEXT: sbcs.w r3, lr, r1 -; FULL-NEXT: sbcs.w r2, lr, r2 -; FULL-NEXT: sbcs.w r2, lr, r12 +; FULL-NEXT: sbcs.w r3, r12, r1 +; FULL-NEXT: sbcs.w r2, r12, r2 +; FULL-NEXT: sbcs.w r2, r12, lr ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 ; FULL-NEXT: csel r0, r0, r2, ne @@ -2121,16 +1848,15 @@ define i32 @stest_f64i32_mm(double %x) { ; SOFT-NEXT: subs r5, r0, r4 ; SOFT-NEXT: mov r5, r1 ; SOFT-NEXT: sbcs r5, r3 -; SOFT-NEXT: mov r5, r2 ; SOFT-NEXT: bge .LBB27_7 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB27_8 +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: bge .LBB27_8 ; SOFT-NEXT: .LBB27_2: @ %entry -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: bne .LBB27_4 ; SOFT-NEXT: .LBB27_3: @ %entry -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: .LBB27_4: @ %entry ; SOFT-NEXT: mvns r3, r3 ; SOFT-NEXT: lsls r2, r2, #31 @@ -2142,12 +1868,12 @@ define i32 @stest_f64i32_mm(double %x) { ; SOFT-NEXT: .LBB27_6: @ %entry ; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB27_7: @ %entry -; SOFT-NEXT: mov r5, r3 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB27_2 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: blt .LBB27_2 ; SOFT-NEXT: .LBB27_8: @ %entry -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: beq .LBB27_3 ; SOFT-NEXT: b .LBB27_4 ; SOFT-NEXT: .p2align 2 @@ -2161,16 +1887,17 @@ define i32 @stest_f64i32_mm(double %x) { ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, r1, d0 ; VFP2-NEXT: bl __aeabi_d2lz -; VFP2-NEXT: mvn r12, #-2147483648 -; VFP2-NEXT: subs.w r3, r0, r12 -; VFP2-NEXT: mov.w r2, #0 +; VFP2-NEXT: mvn r2, #-2147483648 +; VFP2-NEXT: subs r3, r0, r2 ; VFP2-NEXT: sbcs r3, r1, #0 +; VFP2-NEXT: it ge +; VFP2-NEXT: movge r0, r2 +; VFP2-NEXT: mov.w r2, #0 ; VFP2-NEXT: it lt ; VFP2-NEXT: movlt r2, #1 ; VFP2-NEXT: cmp r2, #0 -; VFP2-NEXT: ite ne +; VFP2-NEXT: it ne ; VFP2-NEXT: movne r2, r1 -; VFP2-NEXT: moveq r0, r12 ; VFP2-NEXT: mov.w r1, #-1 ; VFP2-NEXT: rsbs.w r3, r0, #-2147483648 ; VFP2-NEXT: sbcs r1, r2 @@ -2234,24 +1961,19 @@ define i32 @ustest_f64i32_mm(double %x) { ; SOFT-NEXT: .save {r7, lr} ; SOFT-NEXT: push {r7, lr} ; SOFT-NEXT: bl __aeabi_d2lz -; SOFT-NEXT: asrs r3, r1, #31 -; SOFT-NEXT: ands r3, r1 -; SOFT-NEXT: movs r2, #0 +; SOFT-NEXT: mov r2, r0 +; SOFT-NEXT: movs r0, #0 ; SOFT-NEXT: cmp r1, #1 -; SOFT-NEXT: bge .LBB29_3 +; SOFT-NEXT: blt .LBB29_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bpl .LBB29_4 +; SOFT-NEXT: mvns r2, r0 ; SOFT-NEXT: .LBB29_2: @ %entry +; SOFT-NEXT: asrs r3, r1, #31 +; SOFT-NEXT: ands r3, r1 +; SOFT-NEXT: bmi .LBB29_4 +; SOFT-NEXT: @ %bb.3: @ %entry ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r7, pc} -; SOFT-NEXT: .LBB29_3: @ %entry -; SOFT-NEXT: mvns r0, r2 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bmi .LBB29_2 ; SOFT-NEXT: .LBB29_4: @ %entry -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: pop {r7, pc} ; ; VFP2-LABEL: ustest_f64i32_mm: @@ -2293,16 +2015,15 @@ define i32 @stest_f32i32_mm(float %x) { ; SOFT-NEXT: subs r5, r0, r4 ; SOFT-NEXT: mov r5, r1 ; SOFT-NEXT: sbcs r5, r3 -; SOFT-NEXT: mov r5, r2 ; SOFT-NEXT: bge .LBB30_7 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB30_8 +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: bge .LBB30_8 ; SOFT-NEXT: .LBB30_2: @ %entry -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: bne .LBB30_4 ; SOFT-NEXT: .LBB30_3: @ %entry -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: .LBB30_4: @ %entry ; SOFT-NEXT: mvns r3, r3 ; SOFT-NEXT: lsls r2, r2, #31 @@ -2314,12 +2035,12 @@ define i32 @stest_f32i32_mm(float %x) { ; SOFT-NEXT: .LBB30_6: @ %entry ; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB30_7: @ %entry -; SOFT-NEXT: mov r5, r3 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB30_2 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: blt .LBB30_2 ; SOFT-NEXT: .LBB30_8: @ %entry -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: beq .LBB30_3 ; SOFT-NEXT: b .LBB30_4 ; SOFT-NEXT: .p2align 2 @@ -2414,16 +2135,15 @@ define i32 @stest_f16i32_mm(half %x) { ; SOFT-NEXT: subs r5, r0, r4 ; SOFT-NEXT: mov r5, r1 ; SOFT-NEXT: sbcs r5, r3 -; SOFT-NEXT: mov r5, r2 ; SOFT-NEXT: bge .LBB33_7 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB33_8 +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: bge .LBB33_8 ; SOFT-NEXT: .LBB33_2: @ %entry -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: bne .LBB33_4 ; SOFT-NEXT: .LBB33_3: @ %entry -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: .LBB33_4: @ %entry ; SOFT-NEXT: mvns r3, r3 ; SOFT-NEXT: lsls r2, r2, #31 @@ -2435,12 +2155,12 @@ define i32 @stest_f16i32_mm(half %x) { ; SOFT-NEXT: .LBB33_6: @ %entry ; SOFT-NEXT: pop {r4, r5, r7, pc} ; SOFT-NEXT: .LBB33_7: @ %entry -; SOFT-NEXT: mov r5, r3 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB33_2 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: blt .LBB33_2 ; SOFT-NEXT: .LBB33_8: @ %entry -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: beq .LBB33_3 ; SOFT-NEXT: b .LBB33_4 ; SOFT-NEXT: .p2align 2 @@ -2973,81 +2693,78 @@ define i64 @stest_f64i64_mm(double %x) { ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 +; SOFT-NEXT: .pad #12 +; SOFT-NEXT: sub sp, #12 ; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: movs r0, #1 ; SOFT-NEXT: movs r5, #0 ; SOFT-NEXT: ldr r6, .LCPI45_0 -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: adds r0, r0, #1 -; SOFT-NEXT: mov r0, r1 -; SOFT-NEXT: sbcs r0, r6 -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: sbcs r0, r5 -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: sbcs r0, r5 -; SOFT-NEXT: mov r7, r4 -; SOFT-NEXT: bge .LBB45_12 +; SOFT-NEXT: adds r4, r7, #1 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: sbcs r4, r6 +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: sbcs r4, r5 +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: sbcs r4, r5 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: blt .LBB45_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB45_13 +; SOFT-NEXT: mov r4, r5 ; SOFT-NEXT: .LBB45_2: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB45_14 -; SOFT-NEXT: .LBB45_3: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB45_5 +; SOFT-NEXT: mvns r6, r5 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB45_12 +; SOFT-NEXT: @ %bb.3: @ %entry +; SOFT-NEXT: beq .LBB45_13 ; SOFT-NEXT: .LBB45_4: @ %entry -; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: beq .LBB45_14 ; SOFT-NEXT: .LBB45_5: @ %entry -; SOFT-NEXT: mvns r0, r5 -; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill ; SOFT-NEXT: bne .LBB45_7 -; SOFT-NEXT: @ %bb.6: @ %entry -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: .LBB45_7: @ %entry -; SOFT-NEXT: lsls r6, r4, #31 -; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload -; SOFT-NEXT: rsbs r7, r7, #0 +; SOFT-NEXT: .LBB45_6: @ %entry ; SOFT-NEXT: mov r7, r6 -; SOFT-NEXT: sbcs r7, r1 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: sbcs r7, r2 -; SOFT-NEXT: sbcs r0, r3 +; SOFT-NEXT: .LBB45_7: @ %entry +; SOFT-NEXT: lsls r3, r0, #31 +; SOFT-NEXT: rsbs r4, r7, #0 +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: sbcs r4, r1 +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: sbcs r6, r2 ; SOFT-NEXT: bge .LBB45_15 ; SOFT-NEXT: @ %bb.8: @ %entry -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: beq .LBB45_16 ; SOFT-NEXT: .LBB45_9: @ %entry -; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: bne .LBB45_11 ; SOFT-NEXT: .LBB45_10: @ %entry -; SOFT-NEXT: str r4, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r1, r3 ; SOFT-NEXT: .LBB45_11: @ %entry -; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload -; SOFT-NEXT: add sp, #4 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: add sp, #12 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB45_12: @ %entry -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB45_2 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bne .LBB45_4 ; SOFT-NEXT: .LBB45_13: @ %entry -; SOFT-NEXT: mov r3, r7 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB45_3 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bne .LBB45_5 ; SOFT-NEXT: .LBB45_14: @ %entry -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB45_4 -; SOFT-NEXT: b .LBB45_5 +; SOFT-NEXT: ldr r1, .LCPI45_0 +; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: beq .LBB45_6 +; SOFT-NEXT: b .LBB45_7 ; SOFT-NEXT: .LBB45_15: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: bne .LBB45_9 ; SOFT-NEXT: .LBB45_16: @ %entry -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: mov r7, r0 ; SOFT-NEXT: beq .LBB45_10 ; SOFT-NEXT: b .LBB45_11 ; SOFT-NEXT: .p2align 2 @@ -3086,8 +2803,8 @@ define i64 @stest_f64i64_mm(double %x) { ; VFP2-NEXT: movlt.w r12, #1 ; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r1, lr ; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: moveq r1, lr ; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FULL-LABEL: stest_f64i64_mm: @@ -3114,8 +2831,8 @@ define i64 @stest_f64i64_mm(double %x) { ; FULL-NEXT: sbcs.w r2, r3, r5 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: csel r0, r0, r2, ne +; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi double %x to i128 @@ -3139,7 +2856,6 @@ define i64 @utest_f64i64_mm(double %x) { ; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: beq .LBB46_5 ; SOFT-NEXT: .LBB46_2: @ %entry -; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: beq .LBB46_6 ; SOFT-NEXT: .LBB46_3: @ %entry ; SOFT-NEXT: pop {r4, pc} @@ -3149,7 +2865,6 @@ define i64 @utest_f64i64_mm(double %x) { ; SOFT-NEXT: bne .LBB46_2 ; SOFT-NEXT: .LBB46_5: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: bne .LBB46_3 ; SOFT-NEXT: .LBB46_6: @ %entry ; SOFT-NEXT: mov r1, r4 @@ -3193,8 +2908,8 @@ entry: define i64 @ustest_f64i64_mm(double %x) { ; SOFT-LABEL: ustest_f64i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r7, lr} -; SOFT-NEXT: push {r4, r5, r7, lr} +; SOFT-NEXT: .save {r4, lr} +; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __fixdfti ; SOFT-NEXT: mov r4, r1 ; SOFT-NEXT: movs r1, #0 @@ -3203,46 +2918,42 @@ define i64 @ustest_f64i64_mm(double %x) { ; SOFT-NEXT: sbcs r2, r1 ; SOFT-NEXT: blt .LBB47_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: beq .LBB47_3 ; SOFT-NEXT: b .LBB47_4 ; SOFT-NEXT: .LBB47_2: -; SOFT-NEXT: movs r5, #1 -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: movs r2, #1 +; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: bne .LBB47_4 ; SOFT-NEXT: .LBB47_3: @ %entry -; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: mov r4, r2 ; SOFT-NEXT: .LBB47_4: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB47_6 +; SOFT-NEXT: beq .LBB47_10 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: bne .LBB47_7 ; SOFT-NEXT: .LBB47_6: @ %entry +; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: .LBB47_7: @ %entry ; SOFT-NEXT: cmp r3, #0 ; SOFT-NEXT: mov r2, r1 -; SOFT-NEXT: bpl .LBB47_10 -; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB47_11 -; SOFT-NEXT: .LBB47_8: @ %entry -; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bpl .LBB47_11 +; SOFT-NEXT: @ %bb.8: @ %entry ; SOFT-NEXT: bpl .LBB47_12 ; SOFT-NEXT: .LBB47_9: @ %entry ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, r5, r7, pc} +; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .LBB47_10: @ %entry -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB47_8 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: beq .LBB47_6 +; SOFT-NEXT: b .LBB47_7 ; SOFT-NEXT: .LBB47_11: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: mov r2, r0 ; SOFT-NEXT: bmi .LBB47_9 ; SOFT-NEXT: .LBB47_12: @ %entry ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, r5, r7, pc} +; SOFT-NEXT: pop {r4, pc} ; ; VFP2-LABEL: ustest_f64i64_mm: ; VFP2: @ %bb.0: @ %entry @@ -3255,17 +2966,13 @@ define i64 @ustest_f64i64_mm(double %x) { ; VFP2-NEXT: it lt ; VFP2-NEXT: movlt.w r12, #1 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq +; VFP2-NEXT: itte eq +; VFP2-NEXT: moveq r1, r12 ; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r3, r12 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r0, #0 +; VFP2-NEXT: movne r12, r3 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it mi +; VFP2-NEXT: itt mi +; VFP2-NEXT: movmi r0, #0 ; VFP2-NEXT: movmi r1, #0 ; VFP2-NEXT: pop {r7, pc} ; @@ -3278,15 +2985,12 @@ define i64 @ustest_f64i64_mm(double %x) { ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r3, r3, r2, ne +; FULL-NEXT: csel r1, r1, r2, ne ; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it mi -; FULL-NEXT: movmi r0, #0 +; FULL-NEXT: csel r2, r3, r2, ne ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it mi +; FULL-NEXT: itt mi +; FULL-NEXT: movmi r0, #0 ; FULL-NEXT: movmi r1, #0 ; FULL-NEXT: pop {r7, pc} entry: @@ -3302,81 +3006,78 @@ define i64 @stest_f32i64_mm(float %x) { ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 +; SOFT-NEXT: .pad #12 +; SOFT-NEXT: sub sp, #12 ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: movs r0, #1 ; SOFT-NEXT: movs r5, #0 ; SOFT-NEXT: ldr r6, .LCPI48_0 -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: adds r0, r0, #1 -; SOFT-NEXT: mov r0, r1 -; SOFT-NEXT: sbcs r0, r6 -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: sbcs r0, r5 -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: sbcs r0, r5 -; SOFT-NEXT: mov r7, r4 -; SOFT-NEXT: bge .LBB48_12 +; SOFT-NEXT: adds r4, r7, #1 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: sbcs r4, r6 +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: sbcs r4, r5 +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: sbcs r4, r5 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: blt .LBB48_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB48_13 +; SOFT-NEXT: mov r4, r5 ; SOFT-NEXT: .LBB48_2: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB48_14 -; SOFT-NEXT: .LBB48_3: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB48_5 +; SOFT-NEXT: mvns r6, r5 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB48_12 +; SOFT-NEXT: @ %bb.3: @ %entry +; SOFT-NEXT: beq .LBB48_13 ; SOFT-NEXT: .LBB48_4: @ %entry -; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: beq .LBB48_14 ; SOFT-NEXT: .LBB48_5: @ %entry -; SOFT-NEXT: mvns r0, r5 -; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill ; SOFT-NEXT: bne .LBB48_7 -; SOFT-NEXT: @ %bb.6: @ %entry -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: .LBB48_7: @ %entry -; SOFT-NEXT: lsls r6, r4, #31 -; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload -; SOFT-NEXT: rsbs r7, r7, #0 +; SOFT-NEXT: .LBB48_6: @ %entry ; SOFT-NEXT: mov r7, r6 -; SOFT-NEXT: sbcs r7, r1 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: sbcs r7, r2 -; SOFT-NEXT: sbcs r0, r3 +; SOFT-NEXT: .LBB48_7: @ %entry +; SOFT-NEXT: lsls r3, r0, #31 +; SOFT-NEXT: rsbs r4, r7, #0 +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: sbcs r4, r1 +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: sbcs r6, r2 ; SOFT-NEXT: bge .LBB48_15 ; SOFT-NEXT: @ %bb.8: @ %entry -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: beq .LBB48_16 ; SOFT-NEXT: .LBB48_9: @ %entry -; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: bne .LBB48_11 ; SOFT-NEXT: .LBB48_10: @ %entry -; SOFT-NEXT: str r4, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r1, r3 ; SOFT-NEXT: .LBB48_11: @ %entry -; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload -; SOFT-NEXT: add sp, #4 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: add sp, #12 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB48_12: @ %entry -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB48_2 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bne .LBB48_4 ; SOFT-NEXT: .LBB48_13: @ %entry -; SOFT-NEXT: mov r3, r7 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB48_3 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bne .LBB48_5 ; SOFT-NEXT: .LBB48_14: @ %entry -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB48_4 -; SOFT-NEXT: b .LBB48_5 +; SOFT-NEXT: ldr r1, .LCPI48_0 +; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: beq .LBB48_6 +; SOFT-NEXT: b .LBB48_7 ; SOFT-NEXT: .LBB48_15: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: bne .LBB48_9 ; SOFT-NEXT: .LBB48_16: @ %entry -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: mov r7, r0 ; SOFT-NEXT: beq .LBB48_10 ; SOFT-NEXT: b .LBB48_11 ; SOFT-NEXT: .p2align 2 @@ -3415,8 +3116,8 @@ define i64 @stest_f32i64_mm(float %x) { ; VFP2-NEXT: movlt.w r12, #1 ; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r1, lr ; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: moveq r1, lr ; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FULL-LABEL: stest_f32i64_mm: @@ -3443,8 +3144,8 @@ define i64 @stest_f32i64_mm(float %x) { ; FULL-NEXT: sbcs.w r2, r3, r5 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: csel r0, r0, r2, ne +; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi float %x to i128 @@ -3468,7 +3169,6 @@ define i64 @utest_f32i64_mm(float %x) { ; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: beq .LBB49_5 ; SOFT-NEXT: .LBB49_2: @ %entry -; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: beq .LBB49_6 ; SOFT-NEXT: .LBB49_3: @ %entry ; SOFT-NEXT: pop {r4, pc} @@ -3478,7 +3178,6 @@ define i64 @utest_f32i64_mm(float %x) { ; SOFT-NEXT: bne .LBB49_2 ; SOFT-NEXT: .LBB49_5: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: bne .LBB49_3 ; SOFT-NEXT: .LBB49_6: @ %entry ; SOFT-NEXT: mov r1, r4 @@ -3522,8 +3221,8 @@ entry: define i64 @ustest_f32i64_mm(float %x) { ; SOFT-LABEL: ustest_f32i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r7, lr} -; SOFT-NEXT: push {r4, r5, r7, lr} +; SOFT-NEXT: .save {r4, lr} +; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: bl __fixsfti ; SOFT-NEXT: mov r4, r1 ; SOFT-NEXT: movs r1, #0 @@ -3532,46 +3231,42 @@ define i64 @ustest_f32i64_mm(float %x) { ; SOFT-NEXT: sbcs r2, r1 ; SOFT-NEXT: blt .LBB50_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: beq .LBB50_3 ; SOFT-NEXT: b .LBB50_4 ; SOFT-NEXT: .LBB50_2: -; SOFT-NEXT: movs r5, #1 -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: movs r2, #1 +; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: bne .LBB50_4 ; SOFT-NEXT: .LBB50_3: @ %entry -; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: mov r4, r2 ; SOFT-NEXT: .LBB50_4: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB50_6 +; SOFT-NEXT: beq .LBB50_10 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: bne .LBB50_7 ; SOFT-NEXT: .LBB50_6: @ %entry +; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: .LBB50_7: @ %entry ; SOFT-NEXT: cmp r3, #0 ; SOFT-NEXT: mov r2, r1 -; SOFT-NEXT: bpl .LBB50_10 -; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB50_11 -; SOFT-NEXT: .LBB50_8: @ %entry -; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bpl .LBB50_11 +; SOFT-NEXT: @ %bb.8: @ %entry ; SOFT-NEXT: bpl .LBB50_12 ; SOFT-NEXT: .LBB50_9: @ %entry ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, r5, r7, pc} +; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .LBB50_10: @ %entry -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB50_8 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: beq .LBB50_6 +; SOFT-NEXT: b .LBB50_7 ; SOFT-NEXT: .LBB50_11: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: mov r2, r0 ; SOFT-NEXT: bmi .LBB50_9 ; SOFT-NEXT: .LBB50_12: @ %entry ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, r5, r7, pc} +; SOFT-NEXT: pop {r4, pc} ; ; VFP2-LABEL: ustest_f32i64_mm: ; VFP2: @ %bb.0: @ %entry @@ -3584,17 +3279,13 @@ define i64 @ustest_f32i64_mm(float %x) { ; VFP2-NEXT: it lt ; VFP2-NEXT: movlt.w r12, #1 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq +; VFP2-NEXT: itte eq +; VFP2-NEXT: moveq r1, r12 ; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r3, r12 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r0, #0 +; VFP2-NEXT: movne r12, r3 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it mi +; VFP2-NEXT: itt mi +; VFP2-NEXT: movmi r0, #0 ; VFP2-NEXT: movmi r1, #0 ; VFP2-NEXT: pop {r7, pc} ; @@ -3607,15 +3298,12 @@ define i64 @ustest_f32i64_mm(float %x) { ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r3, r3, r2, ne +; FULL-NEXT: csel r1, r1, r2, ne ; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it mi -; FULL-NEXT: movmi r0, #0 +; FULL-NEXT: csel r2, r3, r2, ne ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it mi +; FULL-NEXT: itt mi +; FULL-NEXT: movmi r0, #0 ; FULL-NEXT: movmi r1, #0 ; FULL-NEXT: pop {r7, pc} entry: @@ -3631,83 +3319,80 @@ define i64 @stest_f16i64_mm(half %x) { ; SOFT: @ %bb.0: @ %entry ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 +; SOFT-NEXT: .pad #12 +; SOFT-NEXT: sub sp, #12 ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: movs r4, #1 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: movs r0, #1 ; SOFT-NEXT: movs r5, #0 ; SOFT-NEXT: ldr r6, .LCPI51_0 -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: adds r0, r0, #1 -; SOFT-NEXT: mov r0, r1 -; SOFT-NEXT: sbcs r0, r6 -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: sbcs r0, r5 -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: sbcs r0, r5 -; SOFT-NEXT: mov r7, r4 -; SOFT-NEXT: bge .LBB51_12 +; SOFT-NEXT: adds r4, r7, #1 +; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: sbcs r4, r6 +; SOFT-NEXT: mov r4, r2 +; SOFT-NEXT: sbcs r4, r5 +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: sbcs r4, r5 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: blt .LBB51_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB51_13 +; SOFT-NEXT: mov r4, r5 ; SOFT-NEXT: .LBB51_2: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB51_14 -; SOFT-NEXT: .LBB51_3: @ %entry -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB51_5 +; SOFT-NEXT: mvns r6, r5 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB51_12 +; SOFT-NEXT: @ %bb.3: @ %entry +; SOFT-NEXT: beq .LBB51_13 ; SOFT-NEXT: .LBB51_4: @ %entry -; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: beq .LBB51_14 ; SOFT-NEXT: .LBB51_5: @ %entry -; SOFT-NEXT: mvns r0, r5 -; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill ; SOFT-NEXT: bne .LBB51_7 -; SOFT-NEXT: @ %bb.6: @ %entry -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: .LBB51_7: @ %entry -; SOFT-NEXT: lsls r6, r4, #31 -; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload -; SOFT-NEXT: rsbs r7, r7, #0 +; SOFT-NEXT: .LBB51_6: @ %entry ; SOFT-NEXT: mov r7, r6 -; SOFT-NEXT: sbcs r7, r1 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: sbcs r7, r2 -; SOFT-NEXT: sbcs r0, r3 +; SOFT-NEXT: .LBB51_7: @ %entry +; SOFT-NEXT: lsls r3, r0, #31 +; SOFT-NEXT: rsbs r4, r7, #0 +; SOFT-NEXT: mov r4, r3 +; SOFT-NEXT: sbcs r4, r1 +; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: sbcs r4, r2 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: sbcs r6, r2 ; SOFT-NEXT: bge .LBB51_15 ; SOFT-NEXT: @ %bb.8: @ %entry -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: beq .LBB51_16 ; SOFT-NEXT: .LBB51_9: @ %entry -; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: bne .LBB51_11 ; SOFT-NEXT: .LBB51_10: @ %entry -; SOFT-NEXT: str r4, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r1, r3 ; SOFT-NEXT: .LBB51_11: @ %entry -; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload -; SOFT-NEXT: add sp, #4 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: add sp, #12 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB51_12: @ %entry -; SOFT-NEXT: mov r7, r5 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB51_2 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bne .LBB51_4 ; SOFT-NEXT: .LBB51_13: @ %entry -; SOFT-NEXT: mov r3, r7 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB51_3 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: str r2, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bne .LBB51_5 ; SOFT-NEXT: .LBB51_14: @ %entry -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB51_4 -; SOFT-NEXT: b .LBB51_5 +; SOFT-NEXT: ldr r1, .LCPI51_0 +; SOFT-NEXT: str r3, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: beq .LBB51_6 +; SOFT-NEXT: b .LBB51_7 ; SOFT-NEXT: .LBB51_15: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: bne .LBB51_9 ; SOFT-NEXT: .LBB51_16: @ %entry -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: mov r7, r0 ; SOFT-NEXT: beq .LBB51_10 ; SOFT-NEXT: b .LBB51_11 ; SOFT-NEXT: .p2align 2 @@ -3749,8 +3434,8 @@ define i64 @stest_f16i64_mm(half %x) { ; VFP2-NEXT: movlt.w r12, #1 ; VFP2-NEXT: cmp.w r12, #0 ; VFP2-NEXT: itt eq -; VFP2-NEXT: moveq r1, lr ; VFP2-NEXT: moveq r0, r12 +; VFP2-NEXT: moveq r1, lr ; VFP2-NEXT: pop {r4, r5, r7, pc} ; ; FULL-LABEL: stest_f16i64_mm: @@ -3779,8 +3464,8 @@ define i64 @stest_f16i64_mm(half %x) { ; FULL-NEXT: sbcs.w r2, r3, r5 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: csel r0, r0, r2, ne +; FULL-NEXT: csel r1, r1, r12, ne ; FULL-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi half %x to i128 @@ -3806,7 +3491,6 @@ define i64 @utesth_f16i64_mm(half %x) { ; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: beq .LBB52_5 ; SOFT-NEXT: .LBB52_2: @ %entry -; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: beq .LBB52_6 ; SOFT-NEXT: .LBB52_3: @ %entry ; SOFT-NEXT: pop {r4, pc} @@ -3816,7 +3500,6 @@ define i64 @utesth_f16i64_mm(half %x) { ; SOFT-NEXT: bne .LBB52_2 ; SOFT-NEXT: .LBB52_5: @ %entry ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: bne .LBB52_3 ; SOFT-NEXT: .LBB52_6: @ %entry ; SOFT-NEXT: mov r1, r4 @@ -3865,8 +3548,8 @@ entry: define i64 @ustest_f16i64_mm(half %x) { ; SOFT-LABEL: ustest_f16i64_mm: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: .save {r4, r5, r7, lr} -; SOFT-NEXT: push {r4, r5, r7, lr} +; SOFT-NEXT: .save {r4, lr} +; SOFT-NEXT: push {r4, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: bl __fixsfti @@ -3877,46 +3560,42 @@ define i64 @ustest_f16i64_mm(half %x) { ; SOFT-NEXT: sbcs r2, r1 ; SOFT-NEXT: blt .LBB53_2 ; SOFT-NEXT: @ %bb.1: @ %entry -; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: beq .LBB53_3 ; SOFT-NEXT: b .LBB53_4 ; SOFT-NEXT: .LBB53_2: -; SOFT-NEXT: movs r5, #1 -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: movs r2, #1 +; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: bne .LBB53_4 ; SOFT-NEXT: .LBB53_3: @ %entry -; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: mov r4, r2 ; SOFT-NEXT: .LBB53_4: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB53_6 +; SOFT-NEXT: beq .LBB53_10 ; SOFT-NEXT: @ %bb.5: @ %entry -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: bne .LBB53_7 ; SOFT-NEXT: .LBB53_6: @ %entry +; SOFT-NEXT: mov r3, r2 +; SOFT-NEXT: .LBB53_7: @ %entry ; SOFT-NEXT: cmp r3, #0 ; SOFT-NEXT: mov r2, r1 -; SOFT-NEXT: bpl .LBB53_10 -; SOFT-NEXT: @ %bb.7: @ %entry -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB53_11 -; SOFT-NEXT: .LBB53_8: @ %entry -; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: bpl .LBB53_11 +; SOFT-NEXT: @ %bb.8: @ %entry ; SOFT-NEXT: bpl .LBB53_12 ; SOFT-NEXT: .LBB53_9: @ %entry ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, r5, r7, pc} +; SOFT-NEXT: pop {r4, pc} ; SOFT-NEXT: .LBB53_10: @ %entry -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB53_8 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: beq .LBB53_6 +; SOFT-NEXT: b .LBB53_7 ; SOFT-NEXT: .LBB53_11: @ %entry -; SOFT-NEXT: mov r4, r5 -; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: mov r2, r0 ; SOFT-NEXT: bmi .LBB53_9 ; SOFT-NEXT: .LBB53_12: @ %entry ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: pop {r4, r5, r7, pc} +; SOFT-NEXT: pop {r4, pc} ; ; VFP2-LABEL: ustest_f16i64_mm: ; VFP2: @ %bb.0: @ %entry @@ -3932,17 +3611,13 @@ define i64 @ustest_f16i64_mm(half %x) { ; VFP2-NEXT: it lt ; VFP2-NEXT: movlt.w r12, #1 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt eq +; VFP2-NEXT: itte eq +; VFP2-NEXT: moveq r1, r12 ; VFP2-NEXT: moveq r0, r12 -; VFP2-NEXT: moveq r3, r12 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r0, #0 +; VFP2-NEXT: movne r12, r3 ; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r1, r12 -; VFP2-NEXT: cmp r3, #0 -; VFP2-NEXT: it mi +; VFP2-NEXT: itt mi +; VFP2-NEXT: movmi r0, #0 ; VFP2-NEXT: movmi r1, #0 ; VFP2-NEXT: pop {r7, pc} ; @@ -3957,15 +3632,12 @@ define i64 @ustest_f16i64_mm(half %x) { ; FULL-NEXT: sbcs r2, r3, #0 ; FULL-NEXT: cset r2, lt ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r3, r3, r2, ne +; FULL-NEXT: csel r1, r1, r2, ne ; FULL-NEXT: csel r0, r0, r2, ne -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it mi -; FULL-NEXT: movmi r0, #0 +; FULL-NEXT: csel r2, r3, r2, ne ; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: cmp r3, #0 -; FULL-NEXT: it mi +; FULL-NEXT: itt mi +; FULL-NEXT: movmi r0, #0 ; FULL-NEXT: movmi r1, #0 ; FULL-NEXT: pop {r7, pc} entry: @@ -4014,16 +3686,15 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT-NEXT: mov r2, r1 ; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload ; SOFT-NEXT: sbcs r2, r3 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: bge .LBB54_14 ; SOFT-NEXT: @ %bb.3: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB54_15 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: bge .LBB54_15 ; SOFT-NEXT: .LBB54_4: @ in Loop: Header=BB54_2 Depth=1 ; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: bne .LBB54_6 ; SOFT-NEXT: .LBB54_5: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: ldr r0, .LCPI54_0 +; SOFT-NEXT: mov r1, r2 ; SOFT-NEXT: .LBB54_6: @ in Loop: Header=BB54_2 Depth=1 ; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload ; SOFT-NEXT: subs r2, r2, r0 @@ -4049,7 +3720,6 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: beq .LBB54_17 ; SOFT-NEXT: .LBB54_10: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: bne .LBB54_12 ; SOFT-NEXT: .LBB54_11: @ in Loop: Header=BB54_2 Depth=1 ; SOFT-NEXT: ldr r0, .LCPI54_0 @@ -4063,11 +3733,11 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; SOFT-NEXT: b .LBB54_1 ; SOFT-NEXT: .LBB54_14: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB54_4 +; SOFT-NEXT: ldr r0, .LCPI54_0 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: blt .LBB54_4 ; SOFT-NEXT: .LBB54_15: @ in Loop: Header=BB54_2 Depth=1 -; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload ; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: beq .LBB54_5 ; SOFT-NEXT: b .LBB54_6 @@ -4077,7 +3747,6 @@ define void @unroll_maxmin(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT-NEXT: bne .LBB54_10 ; SOFT-NEXT: .LBB54_17: @ in Loop: Header=BB54_2 Depth=1 ; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: cmp r2, #0 ; SOFT-NEXT: beq .LBB54_11 ; SOFT-NEXT: b .LBB54_12 ; SOFT-NEXT: .LBB54_18: @@ -4180,18 +3849,17 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #20 -; SOFT-NEXT: sub sp, #20 +; SOFT-NEXT: .pad #12 +; SOFT-NEXT: sub sp, #12 ; SOFT-NEXT: mov r4, r1 ; SOFT-NEXT: mov r5, r0 ; SOFT-NEXT: movs r0, #0 -; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill ; SOFT-NEXT: mvns r0, r0 -; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: movs r0, #1 ; SOFT-NEXT: lsls r1, r0, #31 -; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill ; SOFT-NEXT: lsls r7, r0, #10 ; SOFT-NEXT: b .LBB55_2 ; SOFT-NEXT: .LBB55_1: @ in Loop: Header=BB55_2 Depth=1 @@ -4199,7 +3867,7 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT-NEXT: adds r4, #8 ; SOFT-NEXT: adds r5, #8 ; SOFT-NEXT: subs r7, r7, #2 -; SOFT-NEXT: beq .LBB55_18 +; SOFT-NEXT: beq .LBB55_14 ; SOFT-NEXT: .LBB55_2: @ =>This Inner Loop Header: Depth=1 ; SOFT-NEXT: ldr r0, [r4] ; SOFT-NEXT: movs r1, #79 @@ -4207,24 +3875,21 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: bl __aeabi_fmul ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: subs r2, r2, r0 -; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload ; SOFT-NEXT: sbcs r2, r1 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: bge .LBB55_14 +; SOFT-NEXT: blt .LBB55_4 ; SOFT-NEXT: @ %bb.3: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB55_15 +; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload ; SOFT-NEXT: .LBB55_4: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB55_6 -; SOFT-NEXT: .LBB55_5: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: blt .LBB55_6 +; SOFT-NEXT: @ %bb.5: @ in Loop: Header=BB55_2 Depth=1 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: .LBB55_6: @ in Loop: Header=BB55_2 Depth=1 ; SOFT-NEXT: ldr r2, .LCPI55_0 ; SOFT-NEXT: subs r2, r0, r2 -; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload ; SOFT-NEXT: sbcs r1, r2 ; SOFT-NEXT: blt .LBB55_8 ; SOFT-NEXT: @ %bb.7: @ in Loop: Header=BB55_2 Depth=1 @@ -4235,52 +3900,31 @@ define void @unroll_minmax(ptr nocapture %0, ptr nocapture readonly %1, i32 %2) ; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: bl __aeabi_fmul ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: subs r2, r2, r0 -; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload ; SOFT-NEXT: sbcs r2, r1 -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: bge .LBB55_16 +; SOFT-NEXT: blt .LBB55_10 ; SOFT-NEXT: @ %bb.9: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB55_17 +; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload ; SOFT-NEXT: .LBB55_10: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB55_12 -; SOFT-NEXT: .LBB55_11: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: blt .LBB55_12 +; SOFT-NEXT: @ %bb.11: @ in Loop: Header=BB55_2 Depth=1 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: .LBB55_12: @ in Loop: Header=BB55_2 Depth=1 ; SOFT-NEXT: ldr r2, .LCPI55_0 ; SOFT-NEXT: subs r2, r0, r2 -; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload ; SOFT-NEXT: sbcs r1, r2 ; SOFT-NEXT: blt .LBB55_1 ; SOFT-NEXT: @ %bb.13: @ in Loop: Header=BB55_2 Depth=1 ; SOFT-NEXT: ldr r0, .LCPI55_0 ; SOFT-NEXT: b .LBB55_1 -; SOFT-NEXT: .LBB55_14: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB55_4 -; SOFT-NEXT: .LBB55_15: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB55_5 -; SOFT-NEXT: b .LBB55_6 -; SOFT-NEXT: .LBB55_16: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB55_10 -; SOFT-NEXT: .LBB55_17: @ in Loop: Header=BB55_2 Depth=1 -; SOFT-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB55_11 -; SOFT-NEXT: b .LBB55_12 -; SOFT-NEXT: .LBB55_18: -; SOFT-NEXT: add sp, #20 +; SOFT-NEXT: .LBB55_14: +; SOFT-NEXT: add sp, #12 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.19: +; SOFT-NEXT: @ %bb.15: ; SOFT-NEXT: .LCPI55_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll index 78090083a0026..4d091c2302658 100644 --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -9,58 +9,58 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r11, lr} ; CHECK-NEXT: push {r4, r5, r11, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vmov r0, r1, d9 -; CHECK-NEXT: adr r2, .LCPI0_0 -; CHECK-NEXT: vld1.64 {d8, d9}, [r2:128] -; CHECK-NEXT: vmov.32 d10[0], r4 +; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: vmov.32 d9[0], r4 ; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: vmov.32 d8[0], r0 ; CHECK-NEXT: mvn r3, #-2147483648 ; CHECK-NEXT: subs r4, r4, r3 -; CHECK-NEXT: sbcs r4, r5, #0 -; CHECK-NEXT: vmov.32 d11[0], r0 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlt r4, #1 +; CHECK-NEXT: adr r2, .LCPI0_0 +; CHECK-NEXT: vmov.32 d9[1], r5 +; CHECK-NEXT: sbcs r5, r5, #0 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: mvn r4, #0 +; CHECK-NEXT: movwlt r5, #1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: mvnne r5, #0 ; CHECK-NEXT: subs r0, r0, r3 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: vmov.32 d11[1], r1 +; CHECK-NEXT: vmov.32 d8[1], r1 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: vmov.i32 q10, #0x80000000 ; CHECK-NEXT: movwlt r0, #1 ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vdup.32 d19, r5 ; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: vmov.32 d10[1], r5 -; CHECK-NEXT: mvnne r4, #0 -; CHECK-NEXT: vdup.32 d17, r0 -; CHECK-NEXT: vdup.32 d16, r4 -; CHECK-NEXT: mvn r4, #0 -; CHECK-NEXT: vbsl q8, q5, q4 -; CHECK-NEXT: vmov r0, r1, d16 -; CHECK-NEXT: vmov r3, r5, d17 +; CHECK-NEXT: vld1.64 {d16, d17}, [r2:128] +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: vdup.32 d18, r0 +; CHECK-NEXT: vbit q8, q4, q9 +; CHECK-NEXT: vmov r0, r1, d17 +; CHECK-NEXT: vmov r3, r5, d16 ; CHECK-NEXT: rsbs r0, r0, #-2147483648 ; CHECK-NEXT: sbcs r0, r4, r1 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mvnne r0, #0 ; CHECK-NEXT: rsbs r1, r3, #-2147483648 ; CHECK-NEXT: sbcs r1, r4, r5 +; CHECK-NEXT: vdup.32 d19, r0 ; CHECK-NEXT: movwlt r2, #1 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vdup.32 d19, r2 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d18, r0 +; CHECK-NEXT: vdup.32 d18, r2 ; CHECK-NEXT: vbif q8, q10, q9 ; CHECK-NEXT: vmovn.i64 d0, q8 -; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r11, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: @@ -95,21 +95,21 @@ define <2 x i32> @utest_f64i32(<2 x double> %x) { ; CHECK-NEXT: vmov.32 d9[0], r4 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: mvn r3, #0 +; CHECK-NEXT: subs r4, r4, r3 +; CHECK-NEXT: sbcs r5, r5, #0 ; CHECK-NEXT: vmov.32 d8[0], r0 -; CHECK-NEXT: subs r0, r0, r3 +; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movwlo r5, #1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: mvnne r5, #0 +; CHECK-NEXT: subs r0, r0, r3 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movwlo r0, #1 -; CHECK-NEXT: subs r1, r4, r3 -; CHECK-NEXT: sbcs r1, r5, #0 +; CHECK-NEXT: vdup.32 d17, r5 ; CHECK-NEXT: movwlo r2, #1 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vdup.32 d17, r2 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d16, r0 +; CHECK-NEXT: vdup.32 d16, r2 ; CHECK-NEXT: vand q9, q4, q8 ; CHECK-NEXT: vorn q8, q9, q8 ; CHECK-NEXT: vmovn.i64 d0, q8 @@ -131,49 +131,49 @@ define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vmov r0, r1, d9 -; CHECK-NEXT: vmov.32 d8[0], r4 +; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: vmov.32 d9[0], r4 ; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: vmov.32 d8[0], r0 ; CHECK-NEXT: mvn r3, #0 ; CHECK-NEXT: subs r4, r4, r3 -; CHECK-NEXT: sbcs r4, r5, #0 -; CHECK-NEXT: vmov.32 d9[0], r0 -; CHECK-NEXT: mov r4, #0 ; CHECK-NEXT: vmov.i64 q9, #0xffffffff -; CHECK-NEXT: movwlt r4, #1 +; CHECK-NEXT: vmov.32 d9[1], r5 +; CHECK-NEXT: sbcs r5, r5, #0 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movwlt r5, #1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: mvnne r5, #0 ; CHECK-NEXT: subs r0, r0, r3 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: vmov.32 d9[1], r1 +; CHECK-NEXT: vmov.32 d8[1], r1 ; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movwlt r0, #1 ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vdup.32 d17, r5 ; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: vmov.32 d8[1], r5 -; CHECK-NEXT: mvnne r4, #0 -; CHECK-NEXT: vdup.32 d17, r0 -; CHECK-NEXT: vdup.32 d16, r4 +; CHECK-NEXT: vdup.32 d16, r0 ; CHECK-NEXT: vbsl q8, q4, q9 -; CHECK-NEXT: vmov r0, r1, d16 -; CHECK-NEXT: vmov r3, r5, d17 +; CHECK-NEXT: vmov r0, r1, d17 +; CHECK-NEXT: vmov r3, r5, d16 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: rscs r0, r1, #0 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mvnne r0, #0 ; CHECK-NEXT: rsbs r1, r3, #0 ; CHECK-NEXT: rscs r1, r5, #0 +; CHECK-NEXT: vmov.32 d19[0], r0 ; CHECK-NEXT: movwlt r2, #1 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov.32 d19[0], r2 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vmov.32 d18[0], r0 +; CHECK-NEXT: vmov.32 d18[0], r2 ; CHECK-NEXT: vand q8, q8, q9 ; CHECK-NEXT: vmovn.i64 d0, q8 ; CHECK-NEXT: vpop {d8, d9} @@ -195,106 +195,103 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: vmov r6, s17 -; CHECK-NEXT: vmov r10, s19 -; CHECK-NEXT: vmov.32 d8[0], r7 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov.32 d10[0], r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov.32 d9[0], r0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: adr r1, .LCPI3_0 +; CHECK-NEXT: vld1.64 {d10, d11}, [r1:128] +; CHECK-NEXT: vmov r5, s17 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: mvn r9, #-2147483648 +; CHECK-NEXT: vmov.32 d13[0], r6 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: subs r3, r7, r6 -; CHECK-NEXT: sbcs r3, r8, #0 -; CHECK-NEXT: vmov.32 d11[0], r0 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: adr r2, .LCPI3_0 -; CHECK-NEXT: movwlt r3, #1 -; CHECK-NEXT: subs r7, r5, r6 -; CHECK-NEXT: sbcs r7, r4, #0 -; CHECK-NEXT: vmov.32 d11[1], r1 -; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: mvnne r7, #0 -; CHECK-NEXT: subs r0, r0, r6 +; CHECK-NEXT: subs r2, r6, r9 +; CHECK-NEXT: vmov.32 d12[0], r0 +; CHECK-NEXT: sbcs r2, r7, #0 +; CHECK-NEXT: vmov r8, s16 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: vmov.32 d13[1], r7 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: subs r0, r0, r9 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: vld1.64 {d18, d19}, [r2:128] +; CHECK-NEXT: vdup.32 d17, r2 ; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: vmov.32 d12[1], r1 ; CHECK-NEXT: movwlt r0, #1 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vmov.32 d10[1], r4 -; CHECK-NEXT: vdup.32 d17, r0 -; CHECK-NEXT: subs r0, r9, r6 -; CHECK-NEXT: sbcs r0, r11, #0 -; CHECK-NEXT: vdup.32 d16, r7 +; CHECK-NEXT: vdup.32 d16, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: vorr q4, q8, q8 +; CHECK-NEXT: vbsl q4, q6, q5 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vmov.32 d13[0], r0 +; CHECK-NEXT: subs r0, r0, r9 +; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: vmov r11, r10, d8 +; CHECK-NEXT: vmov.32 d13[1], r1 +; CHECK-NEXT: mvnne r6, #0 +; CHECK-NEXT: vmov r5, r7, d9 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vmov.32 d12[0], r0 +; CHECK-NEXT: subs r0, r0, r9 +; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: vbsl q8, q5, q9 +; CHECK-NEXT: vdup.32 d17, r6 ; CHECK-NEXT: movwlt r0, #1 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov.32 d9[1], r11 ; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: vmov.32 d12[1], r1 +; CHECK-NEXT: rsbs r3, r11, #-2147483648 +; CHECK-NEXT: vdup.32 d16, r0 +; CHECK-NEXT: mvn r0, #0 +; CHECK-NEXT: vbsl q8, q6, q5 +; CHECK-NEXT: adr r1, .LCPI3_1 +; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128] +; CHECK-NEXT: sbcs r3, r0, r10 +; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: vmov r1, r2, d17 +; CHECK-NEXT: movwlt r3, #1 ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mvn r6, #0 -; CHECK-NEXT: vdup.32 d21, r0 ; CHECK-NEXT: mvnne r3, #0 -; CHECK-NEXT: vmov.32 d8[1], r8 -; CHECK-NEXT: vmov r0, r1, d16 -; CHECK-NEXT: vdup.32 d20, r3 -; CHECK-NEXT: vbit q9, q4, q10 -; CHECK-NEXT: adr r5, .LCPI3_1 -; CHECK-NEXT: vld1.64 {d20, d21}, [r5:128] -; CHECK-NEXT: vmov r5, r4, d17 -; CHECK-NEXT: vmov r3, r7, d18 -; CHECK-NEXT: rsbs r0, r0, #-2147483648 -; CHECK-NEXT: sbcs r0, r6, r1 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: rsbs r1, r3, #-2147483648 -; CHECK-NEXT: vmov r1, r3, d19 -; CHECK-NEXT: sbcs r7, r6, r7 +; CHECK-NEXT: rsbs r6, r5, #-2147483648 +; CHECK-NEXT: vmov r6, r5, d16 +; CHECK-NEXT: sbcs r7, r0, r7 ; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: rsbs r5, r5, #-2147483648 -; CHECK-NEXT: sbcs r5, r6, r4 -; CHECK-NEXT: mov r5, #0 -; CHECK-NEXT: movwlt r5, #1 -; CHECK-NEXT: rsbs r1, r1, #-2147483648 -; CHECK-NEXT: sbcs r1, r6, r3 -; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: mvnne r5, #0 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: vdup.32 d25, r5 ; CHECK-NEXT: mvnne r7, #0 -; CHECK-NEXT: vdup.32 d23, r2 -; CHECK-NEXT: vdup.32 d24, r0 -; CHECK-NEXT: vbif q8, q10, q12 -; CHECK-NEXT: vdup.32 d22, r7 -; CHECK-NEXT: vbif q9, q10, q11 -; CHECK-NEXT: vmovn.i64 d1, q8 -; CHECK-NEXT: vmovn.i64 d0, q9 -; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: vdup.32 d23, r7 +; CHECK-NEXT: vdup.32 d22, r3 +; CHECK-NEXT: vbsl q11, q4, q9 +; CHECK-NEXT: vmovn.i64 d1, q11 +; CHECK-NEXT: rsbs r1, r1, #-2147483648 +; CHECK-NEXT: sbcs r1, r0, r2 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mvnne r1, #0 +; CHECK-NEXT: rsbs r2, r6, #-2147483648 +; CHECK-NEXT: sbcs r0, r0, r5 +; CHECK-NEXT: vdup.32 d21, r1 +; CHECK-NEXT: movwlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mvnne r4, #0 +; CHECK-NEXT: vdup.32 d20, r4 +; CHECK-NEXT: vbif q8, q9, q10 +; CHECK-NEXT: vmovn.i64 d0, q8 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, sp, #4 ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 4 @@ -329,52 +326,52 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: vmov r6, s19 +; CHECK-NEXT: vmov r5, s19 ; CHECK-NEXT: vmov r7, s18 -; CHECK-NEXT: vmov.32 d9[0], r9 +; CHECK-NEXT: vmov.32 d9[0], r10 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: vmov.32 d8[0], r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vmov.32 d11[0], r0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r10, r1 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mvn r7, #0 -; CHECK-NEXT: subs r2, r5, r7 -; CHECK-NEXT: sbcs r2, r4, #0 +; CHECK-NEXT: mvn r3, #0 ; CHECK-NEXT: vmov.32 d10[0], r0 +; CHECK-NEXT: subs r0, r0, r3 ; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: movwlo r2, #1 -; CHECK-NEXT: subs r0, r0, r7 ; CHECK-NEXT: sbcs r0, r1, #0 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlo r0, #1 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: subs r1, r6, r7 -; CHECK-NEXT: sbcs r1, r10, #0 +; CHECK-NEXT: subs r1, r5, r3 +; CHECK-NEXT: sbcs r1, r4, #0 ; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: movwlo r1, #1 -; CHECK-NEXT: subs r7, r9, r7 -; CHECK-NEXT: sbcs r7, r8, #0 -; CHECK-NEXT: movwlo r3, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mvnne r3, #0 ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: mvnne r1, #0 -; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: subs r7, r10, r3 +; CHECK-NEXT: sbcs r7, r8, #0 ; CHECK-NEXT: vdup.32 d19, r1 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: vdup.32 d17, r3 +; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: vdup.32 d18, r0 +; CHECK-NEXT: movwlo r7, #1 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: mvnne r7, #0 +; CHECK-NEXT: subs r3, r6, r3 +; CHECK-NEXT: sbcs r3, r9, #0 +; CHECK-NEXT: vdup.32 d17, r7 +; CHECK-NEXT: movwlo r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 ; CHECK-NEXT: vand q10, q5, q9 ; CHECK-NEXT: vdup.32 d16, r2 ; CHECK-NEXT: vand q11, q4, q8 @@ -400,97 +397,96 @@ define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: vmov.32 d16[0], r2 -; CHECK-NEXT: mvn r4, #0 -; CHECK-NEXT: subs r2, r2, r4 -; CHECK-NEXT: vmov r8, s19 -; CHECK-NEXT: sbcs r2, r1, #0 -; CHECK-NEXT: vmov.32 d17[0], r5 +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: vmov r5, s17 +; CHECK-NEXT: vmov r8, s16 +; CHECK-NEXT: vmov.32 d9[0], r6 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: mvn r9, #0 +; CHECK-NEXT: subs r2, r6, r9 +; CHECK-NEXT: sbcs r2, r7, #0 +; CHECK-NEXT: vmov.32 d8[0], r0 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: vmov.i64 q5, #0xffffffff ; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: subs r3, r5, r4 -; CHECK-NEXT: sbcs r3, r6, #0 -; CHECK-NEXT: vmov.32 d17[1], r6 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: movwlt r3, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: mvnne r3, #0 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vdup.32 d19, r3 ; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: vdup.32 d18, r2 -; CHECK-NEXT: vmov.32 d16[1], r1 -; CHECK-NEXT: vorr q4, q9, q9 -; CHECK-NEXT: vbsl q4, q8, q5 -; CHECK-NEXT: vmov r10, r9, d8 +; CHECK-NEXT: subs r0, r0, r9 +; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: vmov.32 d9[1], r7 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vmov.32 d8[1], r1 +; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: vdup.32 d17, r2 +; CHECK-NEXT: vdup.32 d16, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: vbif q4, q5, q8 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov.32 d12[0], r0 +; CHECK-NEXT: vmov.32 d13[0], r0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: vmov r7, r10, d8 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r5, r4 -; CHECK-NEXT: vmov.32 d13[0], r0 +; CHECK-NEXT: subs r2, r5, r9 +; CHECK-NEXT: vmov.32 d12[0], r0 ; CHECK-NEXT: sbcs r2, r6, #0 ; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: vmov.32 d13[1], r6 ; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: subs r0, r0, r4 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: subs r0, r0, r9 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: vmov.32 d13[1], r1 +; CHECK-NEXT: vdup.32 d17, r2 ; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: vmov r5, r4, d9 +; CHECK-NEXT: vmov.32 d12[1], r1 ; CHECK-NEXT: movwlt r0, #1 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vmov.32 d12[1], r6 -; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: vdup.32 d17, r0 -; CHECK-NEXT: rsbs r0, r10, #0 -; CHECK-NEXT: vdup.32 d16, r2 -; CHECK-NEXT: rscs r0, r9, #0 +; CHECK-NEXT: vmov r2, r3, d9 +; CHECK-NEXT: vdup.32 d16, r0 +; CHECK-NEXT: rsbs r7, r7, #0 ; CHECK-NEXT: vbsl q8, q6, q5 +; CHECK-NEXT: rscs r7, r10, #0 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: movwlt r7, #1 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: vmov r0, r1, d17 +; CHECK-NEXT: mvnne r7, #0 +; CHECK-NEXT: vmov r6, r5, d16 +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: rscs r0, r1, #0 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: vmov r1, r2, d16 -; CHECK-NEXT: vmov r3, r6, d17 -; CHECK-NEXT: rsbs r1, r1, #0 -; CHECK-NEXT: rscs r1, r2, #0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: rsbs r1, r2, #0 +; CHECK-NEXT: rscs r1, r3, #0 +; CHECK-NEXT: vmov.32 d19[0], r0 ; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: movwlt r1, #1 -; CHECK-NEXT: rsbs r2, r3, #0 -; CHECK-NEXT: rscs r2, r6, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: rsbs r3, r5, #0 -; CHECK-NEXT: rscs r3, r4, #0 -; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: mvnne r7, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: mvnne r2, #0 ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: mvnne r1, #0 -; CHECK-NEXT: vmov.32 d21[0], r2 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov.32 d20[0], r1 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vmov.32 d19[0], r7 -; CHECK-NEXT: vand q8, q8, q10 -; CHECK-NEXT: vmov.32 d18[0], r0 -; CHECK-NEXT: vmovn.i64 d1, q8 -; CHECK-NEXT: vand q9, q4, q9 -; CHECK-NEXT: vmovn.i64 d0, q9 +; CHECK-NEXT: rsbs r0, r6, #0 +; CHECK-NEXT: rscs r0, r5, #0 +; CHECK-NEXT: vmov.32 d21[0], r1 +; CHECK-NEXT: movwlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vmov.32 d20[0], r7 +; CHECK-NEXT: mvnne r4, #0 +; CHECK-NEXT: vmov.32 d18[0], r4 +; CHECK-NEXT: vand q10, q4, q10 +; CHECK-NEXT: vand q8, q8, q9 +; CHECK-NEXT: vmovn.i64 d1, q10 +; CHECK-NEXT: vmovn.i64 d0, q8 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: @@ -510,112 +506,108 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEON-NEXT: .pad #4 ; CHECK-NEON-NEXT: sub sp, sp, #4 -; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEON-NEXT: vmov r0, s2 ; CHECK-NEON-NEXT: vmov.f32 s16, s3 -; CHECK-NEON-NEXT: vmov.f32 s18, s2 -; CHECK-NEON-NEXT: vmov.f32 s20, s1 +; CHECK-NEON-NEXT: vmov.f32 s18, s1 +; CHECK-NEON-NEXT: vmov.f32 s20, s0 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r9, r0 -; CHECK-NEON-NEXT: vmov r0, s18 -; CHECK-NEON-NEXT: vmov r10, s16 -; CHECK-NEON-NEXT: mov r8, r1 -; CHECK-NEON-NEXT: vmov r6, s20 -; CHECK-NEON-NEXT: vmov.32 d8[0], r9 +; CHECK-NEON-NEXT: mov r6, r0 +; CHECK-NEON-NEXT: vmov r0, s16 +; CHECK-NEON-NEXT: mov r5, r1 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r5, r0 -; CHECK-NEON-NEXT: vmov.32 d10[0], r0 -; CHECK-NEON-NEXT: mov r0, r6 -; CHECK-NEON-NEXT: mov r4, r1 -; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r11, r0 -; CHECK-NEON-NEXT: vmov.32 d9[0], r0 -; CHECK-NEON-NEXT: mov r0, r10 -; CHECK-NEON-NEXT: mov r7, r1 -; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mvn r6, #-2147483648 -; CHECK-NEON-NEXT: subs r3, r9, r6 -; CHECK-NEON-NEXT: sbcs r3, r8, #0 -; CHECK-NEON-NEXT: vmov.32 d11[0], r0 -; CHECK-NEON-NEXT: mov r3, #0 -; CHECK-NEON-NEXT: adr r2, .LCPI6_0 -; CHECK-NEON-NEXT: movwlt r3, #1 -; CHECK-NEON-NEXT: subs r5, r5, r6 -; CHECK-NEON-NEXT: sbcs r5, r4, #0 -; CHECK-NEON-NEXT: vmov.32 d11[1], r1 -; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: movwlt r5, #1 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: mvnne r5, #0 -; CHECK-NEON-NEXT: subs r0, r0, r6 +; CHECK-NEON-NEXT: vmov r2, s18 +; CHECK-NEON-NEXT: adr r3, .LCPI6_0 +; CHECK-NEON-NEXT: vld1.64 {d8, d9}, [r3:128] +; CHECK-NEON-NEXT: mvn r9, #-2147483648 +; CHECK-NEON-NEXT: subs r3, r6, r9 +; CHECK-NEON-NEXT: mov r4, #0 +; CHECK-NEON-NEXT: sbcs r3, r5, #0 +; CHECK-NEON-NEXT: vmov.32 d15[0], r0 +; CHECK-NEON-NEXT: movwlt r4, #1 +; CHECK-NEON-NEXT: cmp r4, #0 +; CHECK-NEON-NEXT: mvnne r4, #0 +; CHECK-NEON-NEXT: subs r0, r0, r9 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 -; CHECK-NEON-NEXT: vld1.64 {d18, d19}, [r2:128] +; CHECK-NEON-NEXT: vmov.32 d14[0], r6 ; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: vmov r8, s20 ; CHECK-NEON-NEXT: movwlt r0, #1 ; CHECK-NEON-NEXT: cmp r0, #0 ; CHECK-NEON-NEXT: mvnne r0, #0 -; CHECK-NEON-NEXT: vmov.32 d10[1], r4 -; CHECK-NEON-NEXT: vdup.32 d17, r0 -; CHECK-NEON-NEXT: subs r0, r11, r6 -; CHECK-NEON-NEXT: sbcs r0, r7, #0 -; CHECK-NEON-NEXT: vdup.32 d16, r5 +; CHECK-NEON-NEXT: vmov.32 d15[1], r1 +; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: vdup.32 d11, r0 +; CHECK-NEON-NEXT: vmov.32 d14[1], r5 +; CHECK-NEON-NEXT: mov r0, r2 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vdup.32 d10, r4 +; CHECK-NEON-NEXT: bl __aeabi_f2lz +; CHECK-NEON-NEXT: vmov.32 d13[0], r0 +; CHECK-NEON-NEXT: subs r0, r0, r9 +; CHECK-NEON-NEXT: vbsl q5, q7, q4 +; CHECK-NEON-NEXT: sbcs r0, r1, #0 +; CHECK-NEON-NEXT: mov r6, #0 +; CHECK-NEON-NEXT: mov r0, r8 +; CHECK-NEON-NEXT: movwlt r6, #1 +; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: vmov r11, r10, d10 +; CHECK-NEON-NEXT: vmov.32 d13[1], r1 +; CHECK-NEON-NEXT: mvnne r6, #0 +; CHECK-NEON-NEXT: vmov r5, r4, d11 +; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: bl __aeabi_f2lz +; CHECK-NEON-NEXT: vmov.32 d12[0], r0 +; CHECK-NEON-NEXT: subs r0, r0, r9 +; CHECK-NEON-NEXT: sbcs r0, r1, #0 ; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: vbsl q8, q5, q9 +; CHECK-NEON-NEXT: vdup.32 d17, r6 ; CHECK-NEON-NEXT: movwlt r0, #1 ; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: vmov.32 d9[1], r7 ; CHECK-NEON-NEXT: mvnne r0, #0 +; CHECK-NEON-NEXT: vmov.32 d12[1], r1 +; CHECK-NEON-NEXT: rsbs r3, r11, #-2147483648 +; CHECK-NEON-NEXT: vdup.32 d16, r0 +; CHECK-NEON-NEXT: mvn r0, #0 +; CHECK-NEON-NEXT: vbsl q8, q6, q4 +; CHECK-NEON-NEXT: adr r1, .LCPI6_1 +; CHECK-NEON-NEXT: vld1.64 {d18, d19}, [r1:128] +; CHECK-NEON-NEXT: sbcs r3, r0, r10 +; CHECK-NEON-NEXT: mov r3, #0 +; CHECK-NEON-NEXT: vmov r1, r2, d17 +; CHECK-NEON-NEXT: movwlt r3, #1 ; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: mvn r6, #0 -; CHECK-NEON-NEXT: vdup.32 d21, r0 ; CHECK-NEON-NEXT: mvnne r3, #0 -; CHECK-NEON-NEXT: vmov.32 d8[1], r8 -; CHECK-NEON-NEXT: vmov r0, r1, d16 -; CHECK-NEON-NEXT: vdup.32 d20, r3 -; CHECK-NEON-NEXT: vbit q9, q4, q10 -; CHECK-NEON-NEXT: adr r5, .LCPI6_1 -; CHECK-NEON-NEXT: vld1.64 {d20, d21}, [r5:128] -; CHECK-NEON-NEXT: vmov r5, r4, d17 -; CHECK-NEON-NEXT: vmov r3, r7, d18 -; CHECK-NEON-NEXT: rsbs r0, r0, #-2147483648 -; CHECK-NEON-NEXT: sbcs r0, r6, r1 -; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: movwlt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: mvnne r0, #0 -; CHECK-NEON-NEXT: rsbs r1, r3, #-2147483648 -; CHECK-NEON-NEXT: vmov r1, r3, d19 -; CHECK-NEON-NEXT: sbcs r7, r6, r7 -; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: movwlt r7, #1 -; CHECK-NEON-NEXT: rsbs r5, r5, #-2147483648 -; CHECK-NEON-NEXT: sbcs r5, r6, r4 -; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: movwlt r5, #1 +; CHECK-NEON-NEXT: rsbs r6, r5, #-2147483648 +; CHECK-NEON-NEXT: sbcs r6, r0, r4 +; CHECK-NEON-NEXT: vmov r5, r4, d16 +; CHECK-NEON-NEXT: mov r6, #0 +; CHECK-NEON-NEXT: movwlt r6, #1 +; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: mvnne r6, #0 +; CHECK-NEON-NEXT: vdup.32 d23, r6 +; CHECK-NEON-NEXT: vdup.32 d22, r3 +; CHECK-NEON-NEXT: vbsl q11, q5, q9 +; CHECK-NEON-NEXT: vmovn.i64 d1, q11 ; CHECK-NEON-NEXT: rsbs r1, r1, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r6, r3 -; CHECK-NEON-NEXT: movwlt r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: mvnne r2, #0 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: mvnne r5, #0 +; CHECK-NEON-NEXT: sbcs r1, r0, r2 +; CHECK-NEON-NEXT: mov r1, #0 +; CHECK-NEON-NEXT: movwlt r1, #1 +; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: mvnne r1, #0 +; CHECK-NEON-NEXT: rsbs r2, r5, #-2147483648 +; CHECK-NEON-NEXT: sbcs r0, r0, r4 +; CHECK-NEON-NEXT: vdup.32 d21, r1 +; CHECK-NEON-NEXT: movwlt r7, #1 ; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: vdup.32 d25, r5 ; CHECK-NEON-NEXT: mvnne r7, #0 -; CHECK-NEON-NEXT: vdup.32 d23, r2 -; CHECK-NEON-NEXT: vdup.32 d24, r0 -; CHECK-NEON-NEXT: vbif q8, q10, q12 -; CHECK-NEON-NEXT: vdup.32 d22, r7 -; CHECK-NEON-NEXT: vbif q9, q10, q11 -; CHECK-NEON-NEXT: vmovn.i64 d1, q8 -; CHECK-NEON-NEXT: vmovn.i64 d0, q9 -; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEON-NEXT: vdup.32 d20, r7 +; CHECK-NEON-NEXT: vbif q8, q9, q10 +; CHECK-NEON-NEXT: vmovn.i64 d0, q8 +; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEON-NEXT: add sp, sp, #4 ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEON-NEXT: .p2align 4 @@ -635,111 +627,104 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: .vsave {d10, d11, d12, d13} -; CHECK-FP16-NEXT: vpush {d10, d11, d12, d13} -; CHECK-FP16-NEXT: .vsave {d8} -; CHECK-FP16-NEXT: vpush {d8} -; CHECK-FP16-NEXT: vmov.u16 r0, d0[0] +; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-FP16-NEXT: vmov.u16 r0, d0[3] ; CHECK-FP16-NEXT: vorr d8, d0, d0 -; CHECK-FP16-NEXT: vmov.u16 r6, d0[1] +; CHECK-FP16-NEXT: vmov.u16 r8, d0[0] +; CHECK-FP16-NEXT: vmov.u16 r9, d0[1] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: mov r4, r0 ; CHECK-FP16-NEXT: vmov.u16 r0, d8[2] -; CHECK-FP16-NEXT: mov r8, r1 -; CHECK-FP16-NEXT: vmov.32 d10[0], r4 -; CHECK-FP16-NEXT: vmov s0, r0 -; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: vmov s0, r6 -; CHECK-FP16-NEXT: mov r5, r0 -; CHECK-FP16-NEXT: mov r7, r1 -; CHECK-FP16-NEXT: vmov.32 d12[0], r0 -; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: mov r9, r0 -; CHECK-FP16-NEXT: vmov.u16 r0, d8[3] -; CHECK-FP16-NEXT: mov r10, r1 -; CHECK-FP16-NEXT: vmov.32 d11[0], r9 +; CHECK-FP16-NEXT: mov r5, r1 +; CHECK-FP16-NEXT: vmov.32 d9[0], r4 ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: mvn r6, #-2147483648 -; CHECK-FP16-NEXT: subs r3, r4, r6 -; CHECK-FP16-NEXT: sbcs r3, r8, #0 -; CHECK-FP16-NEXT: vmov.32 d13[0], r0 -; CHECK-FP16-NEXT: mov r3, #0 ; CHECK-FP16-NEXT: adr r2, .LCPI6_0 -; CHECK-FP16-NEXT: movwlt r3, #1 -; CHECK-FP16-NEXT: subs r5, r5, r6 -; CHECK-FP16-NEXT: sbcs r5, r7, #0 -; CHECK-FP16-NEXT: vmov.32 d13[1], r1 -; CHECK-FP16-NEXT: mov r5, #0 -; CHECK-FP16-NEXT: movwlt r5, #1 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: mvnne r5, #0 -; CHECK-FP16-NEXT: subs r0, r0, r6 +; CHECK-FP16-NEXT: mvn r10, #-2147483648 +; CHECK-FP16-NEXT: vld1.64 {d10, d11}, [r2:128] +; CHECK-FP16-NEXT: subs r2, r4, r10 +; CHECK-FP16-NEXT: sbcs r2, r5, #0 +; CHECK-FP16-NEXT: vmov s0, r9 +; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: vmov.32 d8[0], r0 +; CHECK-FP16-NEXT: movwlt r2, #1 +; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: mvnne r2, #0 +; CHECK-FP16-NEXT: subs r0, r0, r10 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 -; CHECK-FP16-NEXT: vld1.64 {d18, d19}, [r2:128] +; CHECK-FP16-NEXT: vmov.32 d9[1], r5 ; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: mov r6, #0 ; CHECK-FP16-NEXT: movwlt r0, #1 ; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: vmov.32 d8[1], r1 ; CHECK-FP16-NEXT: mvnne r0, #0 -; CHECK-FP16-NEXT: vmov.32 d12[1], r7 -; CHECK-FP16-NEXT: vdup.32 d17, r0 -; CHECK-FP16-NEXT: subs r0, r9, r6 -; CHECK-FP16-NEXT: sbcs r0, r10, #0 -; CHECK-FP16-NEXT: vdup.32 d16, r5 +; CHECK-FP16-NEXT: vdup.32 d17, r2 +; CHECK-FP16-NEXT: vdup.32 d16, r0 +; CHECK-FP16-NEXT: vbif q4, q5, q8 +; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: vmov.32 d13[0], r0 +; CHECK-FP16-NEXT: subs r0, r0, r10 +; CHECK-FP16-NEXT: vmov s0, r8 +; CHECK-FP16-NEXT: sbcs r0, r1, #0 +; CHECK-FP16-NEXT: mov r7, #0 +; CHECK-FP16-NEXT: vmov r9, r8, d8 +; CHECK-FP16-NEXT: movwlt r7, #1 +; CHECK-FP16-NEXT: cmp r7, #0 +; CHECK-FP16-NEXT: vmov.32 d13[1], r1 +; CHECK-FP16-NEXT: vmov r5, r4, d9 +; CHECK-FP16-NEXT: mvnne r7, #0 +; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: vmov.32 d12[0], r0 +; CHECK-FP16-NEXT: subs r0, r0, r10 +; CHECK-FP16-NEXT: sbcs r0, r1, #0 ; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: vbsl q8, q6, q9 +; CHECK-FP16-NEXT: vdup.32 d17, r7 ; CHECK-FP16-NEXT: movwlt r0, #1 ; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: vmov.32 d11[1], r10 ; CHECK-FP16-NEXT: mvnne r0, #0 +; CHECK-FP16-NEXT: vmov.32 d12[1], r1 +; CHECK-FP16-NEXT: rsbs r3, r9, #-2147483648 +; CHECK-FP16-NEXT: vdup.32 d16, r0 +; CHECK-FP16-NEXT: mvn r0, #0 +; CHECK-FP16-NEXT: vbsl q8, q6, q5 +; CHECK-FP16-NEXT: adr r1, .LCPI6_1 +; CHECK-FP16-NEXT: vld1.64 {d18, d19}, [r1:128] +; CHECK-FP16-NEXT: sbcs r3, r0, r8 +; CHECK-FP16-NEXT: mov r3, #0 +; CHECK-FP16-NEXT: vmov r1, r2, d17 +; CHECK-FP16-NEXT: movwlt r3, #1 ; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: mvn r6, #0 -; CHECK-FP16-NEXT: vdup.32 d21, r0 ; CHECK-FP16-NEXT: mvnne r3, #0 -; CHECK-FP16-NEXT: vmov.32 d10[1], r8 -; CHECK-FP16-NEXT: vmov r0, r1, d16 -; CHECK-FP16-NEXT: vdup.32 d20, r3 -; CHECK-FP16-NEXT: vbit q9, q5, q10 -; CHECK-FP16-NEXT: adr r5, .LCPI6_1 -; CHECK-FP16-NEXT: vld1.64 {d20, d21}, [r5:128] -; CHECK-FP16-NEXT: vmov r5, r4, d17 -; CHECK-FP16-NEXT: vmov r3, r7, d18 -; CHECK-FP16-NEXT: rsbs r0, r0, #-2147483648 -; CHECK-FP16-NEXT: sbcs r0, r6, r1 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: mvnne r0, #0 -; CHECK-FP16-NEXT: rsbs r1, r3, #-2147483648 -; CHECK-FP16-NEXT: vmov r1, r3, d19 -; CHECK-FP16-NEXT: sbcs r7, r6, r7 +; CHECK-FP16-NEXT: rsbs r7, r5, #-2147483648 +; CHECK-FP16-NEXT: sbcs r7, r0, r4 +; CHECK-FP16-NEXT: vmov r5, r4, d16 ; CHECK-FP16-NEXT: mov r7, #0 ; CHECK-FP16-NEXT: movwlt r7, #1 -; CHECK-FP16-NEXT: rsbs r5, r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r5, r6, r4 -; CHECK-FP16-NEXT: mov r5, #0 -; CHECK-FP16-NEXT: movwlt r5, #1 -; CHECK-FP16-NEXT: rsbs r1, r1, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r6, r3 -; CHECK-FP16-NEXT: movwlt r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: mvnne r2, #0 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: mvnne r5, #0 ; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: vdup.32 d25, r5 ; CHECK-FP16-NEXT: mvnne r7, #0 -; CHECK-FP16-NEXT: vdup.32 d23, r2 -; CHECK-FP16-NEXT: vdup.32 d24, r0 -; CHECK-FP16-NEXT: vbif q8, q10, q12 -; CHECK-FP16-NEXT: vdup.32 d22, r7 -; CHECK-FP16-NEXT: vbif q9, q10, q11 -; CHECK-FP16-NEXT: vmovn.i64 d1, q8 -; CHECK-FP16-NEXT: vmovn.i64 d0, q9 -; CHECK-FP16-NEXT: vpop {d8} -; CHECK-FP16-NEXT: vpop {d10, d11, d12, d13} +; CHECK-FP16-NEXT: vdup.32 d23, r7 +; CHECK-FP16-NEXT: vdup.32 d22, r3 +; CHECK-FP16-NEXT: vbsl q11, q4, q9 +; CHECK-FP16-NEXT: vmovn.i64 d1, q11 +; CHECK-FP16-NEXT: rsbs r1, r1, #-2147483648 +; CHECK-FP16-NEXT: sbcs r1, r0, r2 +; CHECK-FP16-NEXT: mov r1, #0 +; CHECK-FP16-NEXT: movwlt r1, #1 +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: mvnne r1, #0 +; CHECK-FP16-NEXT: rsbs r2, r5, #-2147483648 +; CHECK-FP16-NEXT: sbcs r0, r0, r4 +; CHECK-FP16-NEXT: vdup.32 d21, r1 +; CHECK-FP16-NEXT: movwlt r6, #1 +; CHECK-FP16-NEXT: cmp r6, #0 +; CHECK-FP16-NEXT: mvnne r6, #0 +; CHECK-FP16-NEXT: vdup.32 d20, r6 +; CHECK-FP16-NEXT: vbif q8, q9, q10 +; CHECK-FP16-NEXT: vmovn.i64 d0, q8 +; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; CHECK-FP16-NEXT: .p2align 4 ; CHECK-FP16-NEXT: @ %bb.1: @@ -766,8 +751,8 @@ entry: define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEON-LABEL: utesth_f16i32: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEON-NEXT: .vsave {d12, d13} ; CHECK-NEON-NEXT: vpush {d12, d13} ; CHECK-NEON-NEXT: .vsave {d8, d9, d10} @@ -778,7 +763,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: vmov.f32 s20, s0 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2ulz -; CHECK-NEON-NEXT: mov r10, r0 +; CHECK-NEON-NEXT: mov r4, r0 ; CHECK-NEON-NEXT: vmov r0, s18 ; CHECK-NEON-NEXT: mov r8, r1 ; CHECK-NEON-NEXT: bl __aeabi_h2f @@ -794,37 +779,37 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: mov r7, r1 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: vmov.32 d9[0], r10 +; CHECK-NEON-NEXT: vmov.32 d9[0], r4 ; CHECK-NEON-NEXT: bl __aeabi_f2ulz -; CHECK-NEON-NEXT: mvn r4, #0 -; CHECK-NEON-NEXT: subs r2, r5, r4 -; CHECK-NEON-NEXT: sbcs r2, r7, #0 +; CHECK-NEON-NEXT: mvn r3, #0 ; CHECK-NEON-NEXT: vmov.32 d8[0], r0 +; CHECK-NEON-NEXT: subs r0, r0, r3 ; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: mov r3, #0 -; CHECK-NEON-NEXT: movwlo r2, #1 -; CHECK-NEON-NEXT: subs r0, r0, r4 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 ; CHECK-NEON-NEXT: mov r0, #0 ; CHECK-NEON-NEXT: movwlo r0, #1 ; CHECK-NEON-NEXT: cmp r0, #0 ; CHECK-NEON-NEXT: mvnne r0, #0 -; CHECK-NEON-NEXT: subs r1, r10, r4 +; CHECK-NEON-NEXT: subs r1, r4, r3 ; CHECK-NEON-NEXT: sbcs r1, r8, #0 ; CHECK-NEON-NEXT: mov r1, #0 ; CHECK-NEON-NEXT: movwlo r1, #1 -; CHECK-NEON-NEXT: subs r7, r6, r4 -; CHECK-NEON-NEXT: sbcs r7, r9, #0 -; CHECK-NEON-NEXT: movwlo r3, #1 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: mvnne r3, #0 ; CHECK-NEON-NEXT: cmp r1, #0 ; CHECK-NEON-NEXT: mvnne r1, #0 -; CHECK-NEON-NEXT: cmp r2, #0 +; CHECK-NEON-NEXT: subs r6, r6, r3 +; CHECK-NEON-NEXT: sbcs r6, r9, #0 ; CHECK-NEON-NEXT: vdup.32 d19, r1 -; CHECK-NEON-NEXT: mvnne r2, #0 -; CHECK-NEON-NEXT: vdup.32 d17, r3 +; CHECK-NEON-NEXT: mov r6, #0 ; CHECK-NEON-NEXT: vdup.32 d18, r0 +; CHECK-NEON-NEXT: movwlo r6, #1 +; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: mvnne r6, #0 +; CHECK-NEON-NEXT: subs r3, r5, r3 +; CHECK-NEON-NEXT: sbcs r3, r7, #0 +; CHECK-NEON-NEXT: vdup.32 d17, r6 +; CHECK-NEON-NEXT: movwlo r2, #1 +; CHECK-NEON-NEXT: cmp r2, #0 +; CHECK-NEON-NEXT: mvnne r2, #0 ; CHECK-NEON-NEXT: vand q10, q4, q9 ; CHECK-NEON-NEXT: vdup.32 d16, r2 ; CHECK-NEON-NEXT: vand q11, q6, q8 @@ -834,12 +819,12 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: vmovn.i64 d0, q8 ; CHECK-NEON-NEXT: vpop {d8, d9, d10} ; CHECK-NEON-NEXT: vpop {d12, d13} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} ; ; CHECK-FP16-LABEL: utesth_f16i32: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] @@ -847,52 +832,52 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-FP16-NEXT: vmov.u16 r5, d0[3] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixunshfdi -; CHECK-FP16-NEXT: mov r10, r0 +; CHECK-FP16-NEXT: mov r4, r0 ; CHECK-FP16-NEXT: vmov.u16 r0, d8[0] ; CHECK-FP16-NEXT: mov r8, r1 -; CHECK-FP16-NEXT: vmov.32 d11[0], r10 +; CHECK-FP16-NEXT: vmov.32 d11[0], r4 ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixunshfdi ; CHECK-FP16-NEXT: vmov s0, r5 ; CHECK-FP16-NEXT: mov r6, r0 -; CHECK-FP16-NEXT: mov r7, r1 +; CHECK-FP16-NEXT: mov r9, r1 ; CHECK-FP16-NEXT: vmov.32 d10[0], r0 ; CHECK-FP16-NEXT: bl __fixunshfdi ; CHECK-FP16-NEXT: mov r5, r0 ; CHECK-FP16-NEXT: vmov.u16 r0, d8[2] -; CHECK-FP16-NEXT: mov r9, r1 +; CHECK-FP16-NEXT: mov r7, r1 ; CHECK-FP16-NEXT: vmov.32 d9[0], r5 ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixunshfdi -; CHECK-FP16-NEXT: mvn r4, #0 -; CHECK-FP16-NEXT: subs r2, r6, r4 -; CHECK-FP16-NEXT: sbcs r2, r7, #0 +; CHECK-FP16-NEXT: mvn r3, #0 ; CHECK-FP16-NEXT: vmov.32 d8[0], r0 +; CHECK-FP16-NEXT: subs r0, r0, r3 ; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: mov r3, #0 -; CHECK-FP16-NEXT: movwlo r2, #1 -; CHECK-FP16-NEXT: subs r0, r0, r4 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 ; CHECK-FP16-NEXT: mov r0, #0 ; CHECK-FP16-NEXT: movwlo r0, #1 ; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: mvnne r0, #0 -; CHECK-FP16-NEXT: subs r1, r5, r4 -; CHECK-FP16-NEXT: sbcs r1, r9, #0 +; CHECK-FP16-NEXT: subs r1, r5, r3 +; CHECK-FP16-NEXT: sbcs r1, r7, #0 ; CHECK-FP16-NEXT: mov r1, #0 ; CHECK-FP16-NEXT: movwlo r1, #1 -; CHECK-FP16-NEXT: subs r7, r10, r4 -; CHECK-FP16-NEXT: sbcs r7, r8, #0 -; CHECK-FP16-NEXT: movwlo r3, #1 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: mvnne r3, #0 ; CHECK-FP16-NEXT: cmp r1, #0 ; CHECK-FP16-NEXT: mvnne r1, #0 -; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: subs r7, r4, r3 +; CHECK-FP16-NEXT: sbcs r7, r8, #0 ; CHECK-FP16-NEXT: vdup.32 d19, r1 -; CHECK-FP16-NEXT: mvnne r2, #0 -; CHECK-FP16-NEXT: vdup.32 d17, r3 +; CHECK-FP16-NEXT: mov r7, #0 ; CHECK-FP16-NEXT: vdup.32 d18, r0 +; CHECK-FP16-NEXT: movwlo r7, #1 +; CHECK-FP16-NEXT: cmp r7, #0 +; CHECK-FP16-NEXT: mvnne r7, #0 +; CHECK-FP16-NEXT: subs r3, r6, r3 +; CHECK-FP16-NEXT: sbcs r3, r9, #0 +; CHECK-FP16-NEXT: vdup.32 d17, r7 +; CHECK-FP16-NEXT: movwlo r2, #1 +; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: mvnne r2, #0 ; CHECK-FP16-NEXT: vand q10, q4, q9 ; CHECK-FP16-NEXT: vdup.32 d16, r2 ; CHECK-FP16-NEXT: vand q11, q5, q8 @@ -901,7 +886,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-FP16-NEXT: vmovn.i64 d1, q9 ; CHECK-FP16-NEXT: vmovn.i64 d0, q8 ; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11} -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} entry: %conv = fptoui <4 x half> %x to <4 x i64> %0 = icmp ult <4 x i64> %conv, @@ -917,211 +902,207 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEON-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEON-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEON-NEXT: vmov r0, s1 -; CHECK-NEON-NEXT: vmov.f32 s16, s3 -; CHECK-NEON-NEXT: vmov.f32 s18, s2 +; CHECK-NEON-NEXT: vmov r0, s3 +; CHECK-NEON-NEXT: vmov.f32 s16, s2 +; CHECK-NEON-NEXT: vmov.f32 s18, s1 ; CHECK-NEON-NEXT: vmov.f32 s20, s0 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r6, r0 -; CHECK-NEON-NEXT: vmov r0, s20 -; CHECK-NEON-NEXT: mov r7, r1 -; CHECK-NEON-NEXT: vmov r5, s18 -; CHECK-NEON-NEXT: vmov r8, s16 -; CHECK-NEON-NEXT: vmov.32 d9[0], r6 +; CHECK-NEON-NEXT: mov r5, r0 +; CHECK-NEON-NEXT: vmov r0, s16 +; CHECK-NEON-NEXT: mov r6, r1 ; CHECK-NEON-NEXT: bl __aeabi_h2f +; CHECK-NEON-NEXT: vmov r8, s20 +; CHECK-NEON-NEXT: vmov.32 d13[0], r5 ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: vmov.32 d8[0], r0 +; CHECK-NEON-NEXT: vmov r2, s18 +; CHECK-NEON-NEXT: vmov.32 d12[0], r0 ; CHECK-NEON-NEXT: mvn r9, #0 ; CHECK-NEON-NEXT: subs r0, r0, r9 -; CHECK-NEON-NEXT: mov r4, #0 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 -; CHECK-NEON-NEXT: vmov.32 d9[1], r7 +; CHECK-NEON-NEXT: vmov.32 d13[1], r6 ; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: mov r7, #0 ; CHECK-NEON-NEXT: movwlt r0, #1 ; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: vmov.32 d8[1], r1 +; CHECK-NEON-NEXT: vmov.32 d12[1], r1 ; CHECK-NEON-NEXT: mvnne r0, #0 -; CHECK-NEON-NEXT: subs r1, r6, r9 -; CHECK-NEON-NEXT: sbcs r1, r7, #0 +; CHECK-NEON-NEXT: subs r1, r5, r9 +; CHECK-NEON-NEXT: sbcs r1, r6, #0 ; CHECK-NEON-NEXT: mov r1, #0 ; CHECK-NEON-NEXT: movwlt r1, #1 ; CHECK-NEON-NEXT: cmp r1, #0 ; CHECK-NEON-NEXT: mvnne r1, #0 -; CHECK-NEON-NEXT: vdup.32 d13, r1 -; CHECK-NEON-NEXT: vdup.32 d12, r0 -; CHECK-NEON-NEXT: mov r0, r5 +; CHECK-NEON-NEXT: vdup.32 d9, r1 +; CHECK-NEON-NEXT: vdup.32 d8, r0 +; CHECK-NEON-NEXT: mov r0, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov.i64 q5, #0xffffffff -; CHECK-NEON-NEXT: vbif q4, q5, q6 +; CHECK-NEON-NEXT: vbsl q4, q6, q5 ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: mov r5, r0 -; CHECK-NEON-NEXT: vmov.32 d12[0], r0 +; CHECK-NEON-NEXT: vmov.32 d13[0], r0 ; CHECK-NEON-NEXT: mov r0, r8 ; CHECK-NEON-NEXT: mov r6, r1 -; CHECK-NEON-NEXT: vmov r7, r10, d8 +; CHECK-NEON-NEXT: vmov r4, r10, d8 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: subs r2, r5, r9 -; CHECK-NEON-NEXT: vmov.32 d13[0], r0 +; CHECK-NEON-NEXT: vmov.32 d12[0], r0 ; CHECK-NEON-NEXT: sbcs r2, r6, #0 ; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: vmov.32 d13[1], r6 ; CHECK-NEON-NEXT: movwlt r2, #1 +; CHECK-NEON-NEXT: cmp r2, #0 +; CHECK-NEON-NEXT: mvnne r2, #0 ; CHECK-NEON-NEXT: subs r0, r0, r9 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 -; CHECK-NEON-NEXT: vmov.32 d13[1], r1 +; CHECK-NEON-NEXT: vdup.32 d17, r2 ; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: vmov.32 d12[1], r1 ; CHECK-NEON-NEXT: movwlt r0, #1 ; CHECK-NEON-NEXT: cmp r0, #0 ; CHECK-NEON-NEXT: mvnne r0, #0 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: vmov.32 d12[1], r6 -; CHECK-NEON-NEXT: mvnne r2, #0 -; CHECK-NEON-NEXT: vdup.32 d17, r0 -; CHECK-NEON-NEXT: rsbs r0, r7, #0 -; CHECK-NEON-NEXT: vdup.32 d16, r2 -; CHECK-NEON-NEXT: vmov r7, r5, d9 +; CHECK-NEON-NEXT: vmov r2, r3, d9 +; CHECK-NEON-NEXT: vdup.32 d16, r0 +; CHECK-NEON-NEXT: rsbs r6, r4, #0 ; CHECK-NEON-NEXT: vbsl q8, q6, q5 -; CHECK-NEON-NEXT: rscs r0, r10, #0 +; CHECK-NEON-NEXT: rscs r6, r10, #0 +; CHECK-NEON-NEXT: mov r6, #0 +; CHECK-NEON-NEXT: movwlt r6, #1 +; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: vmov r0, r1, d17 +; CHECK-NEON-NEXT: mvnne r6, #0 +; CHECK-NEON-NEXT: vmov r5, r4, d16 +; CHECK-NEON-NEXT: rsbs r0, r0, #0 +; CHECK-NEON-NEXT: rscs r0, r1, #0 ; CHECK-NEON-NEXT: mov r0, #0 ; CHECK-NEON-NEXT: movwlt r0, #1 -; CHECK-NEON-NEXT: vmov r1, r2, d16 -; CHECK-NEON-NEXT: vmov r3, r6, d17 -; CHECK-NEON-NEXT: rsbs r1, r1, #0 -; CHECK-NEON-NEXT: rscs r1, r2, #0 +; CHECK-NEON-NEXT: cmp r0, #0 +; CHECK-NEON-NEXT: mvnne r0, #0 +; CHECK-NEON-NEXT: rsbs r1, r2, #0 +; CHECK-NEON-NEXT: rscs r1, r3, #0 +; CHECK-NEON-NEXT: vmov.32 d19[0], r0 ; CHECK-NEON-NEXT: mov r1, #0 ; CHECK-NEON-NEXT: movwlt r1, #1 -; CHECK-NEON-NEXT: rsbs r2, r3, #0 -; CHECK-NEON-NEXT: rscs r2, r6, #0 -; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: movwlt r2, #1 -; CHECK-NEON-NEXT: rsbs r3, r7, #0 -; CHECK-NEON-NEXT: rscs r3, r5, #0 -; CHECK-NEON-NEXT: movwlt r4, #1 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: mvnne r4, #0 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: mvnne r2, #0 ; CHECK-NEON-NEXT: cmp r1, #0 ; CHECK-NEON-NEXT: mvnne r1, #0 -; CHECK-NEON-NEXT: vmov.32 d21[0], r2 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: vmov.32 d20[0], r1 -; CHECK-NEON-NEXT: mvnne r0, #0 -; CHECK-NEON-NEXT: vmov.32 d19[0], r4 -; CHECK-NEON-NEXT: vand q8, q8, q10 -; CHECK-NEON-NEXT: vmov.32 d18[0], r0 -; CHECK-NEON-NEXT: vmovn.i64 d1, q8 -; CHECK-NEON-NEXT: vand q9, q4, q9 -; CHECK-NEON-NEXT: vmovn.i64 d0, q9 +; CHECK-NEON-NEXT: rsbs r0, r5, #0 +; CHECK-NEON-NEXT: rscs r0, r4, #0 +; CHECK-NEON-NEXT: vmov.32 d21[0], r1 +; CHECK-NEON-NEXT: movwlt r7, #1 +; CHECK-NEON-NEXT: cmp r7, #0 +; CHECK-NEON-NEXT: vmov.32 d20[0], r6 +; CHECK-NEON-NEXT: mvnne r7, #0 +; CHECK-NEON-NEXT: vmov.32 d18[0], r7 +; CHECK-NEON-NEXT: vand q10, q4, q10 +; CHECK-NEON-NEXT: vand q8, q8, q9 +; CHECK-NEON-NEXT: vmovn.i64 d1, q10 +; CHECK-NEON-NEXT: vmovn.i64 d0, q8 ; CHECK-NEON-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; ; CHECK-FP16-LABEL: ustest_f16i32: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-FP16-NEXT: .vsave {d10, d11, d12, d13, d14, d15} -; CHECK-FP16-NEXT: vpush {d10, d11, d12, d13, d14, d15} -; CHECK-FP16-NEXT: .vsave {d8} -; CHECK-FP16-NEXT: vpush {d8} -; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-FP16-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-FP16-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-FP16-NEXT: vmov.u16 r0, d0[3] ; CHECK-FP16-NEXT: vorr d8, d0, d0 -; CHECK-FP16-NEXT: vmov.u16 r8, d0[2] -; CHECK-FP16-NEXT: vmov.u16 r9, d0[3] +; CHECK-FP16-NEXT: vmov.u16 r8, d0[0] +; CHECK-FP16-NEXT: vmov.u16 r9, d0[1] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: mov r4, r0 -; CHECK-FP16-NEXT: vmov.u16 r0, d8[0] +; CHECK-FP16-NEXT: vmov.u16 r0, d8[2] ; CHECK-FP16-NEXT: mov r5, r1 -; CHECK-FP16-NEXT: vmov.32 d11[0], r4 +; CHECK-FP16-NEXT: vmov.32 d9[0], r4 ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: vmov.32 d10[0], r0 -; CHECK-FP16-NEXT: mvn r7, #0 -; CHECK-FP16-NEXT: subs r0, r0, r7 -; CHECK-FP16-NEXT: vmov.i64 q6, #0xffffffff +; CHECK-FP16-NEXT: mvn r10, #0 +; CHECK-FP16-NEXT: subs r2, r4, r10 +; CHECK-FP16-NEXT: sbcs r2, r5, #0 +; CHECK-FP16-NEXT: vmov.32 d8[0], r0 +; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: vmov s0, r9 +; CHECK-FP16-NEXT: movwlt r2, #1 +; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: mvnne r2, #0 +; CHECK-FP16-NEXT: subs r0, r0, r10 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 -; CHECK-FP16-NEXT: vmov.32 d11[1], r5 +; CHECK-FP16-NEXT: vmov.32 d9[1], r5 ; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: vmov s0, r8 +; CHECK-FP16-NEXT: vmov.i64 q5, #0xffffffff ; CHECK-FP16-NEXT: movwlt r0, #1 ; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: vmov.32 d10[1], r1 +; CHECK-FP16-NEXT: vmov.32 d8[1], r1 ; CHECK-FP16-NEXT: mvnne r0, #0 -; CHECK-FP16-NEXT: subs r1, r4, r7 ; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: sbcs r1, r5, #0 -; CHECK-FP16-NEXT: vmov s16, r9 -; CHECK-FP16-NEXT: mov r1, #0 -; CHECK-FP16-NEXT: movwlt r1, #1 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mvnne r1, #0 -; CHECK-FP16-NEXT: vdup.32 d17, r1 +; CHECK-FP16-NEXT: vdup.32 d17, r2 ; CHECK-FP16-NEXT: vdup.32 d16, r0 -; CHECK-FP16-NEXT: vbif q5, q6, q8 -; CHECK-FP16-NEXT: vmov r9, r8, d10 +; CHECK-FP16-NEXT: vbif q4, q5, q8 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: vmov.f32 s0, s16 +; CHECK-FP16-NEXT: vmov s0, r8 ; CHECK-FP16-NEXT: mov r4, r0 ; CHECK-FP16-NEXT: mov r5, r1 -; CHECK-FP16-NEXT: vmov.32 d14[0], r0 +; CHECK-FP16-NEXT: vmov.32 d13[0], r0 +; CHECK-FP16-NEXT: vmov r7, r8, d8 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: subs r2, r4, r7 -; CHECK-FP16-NEXT: vmov.32 d15[0], r0 +; CHECK-FP16-NEXT: subs r2, r4, r10 +; CHECK-FP16-NEXT: vmov.32 d12[0], r0 ; CHECK-FP16-NEXT: sbcs r2, r5, #0 ; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: vmov.32 d13[1], r5 ; CHECK-FP16-NEXT: movwlt r2, #1 -; CHECK-FP16-NEXT: subs r0, r0, r7 +; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: mvnne r2, #0 +; CHECK-FP16-NEXT: subs r0, r0, r10 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 -; CHECK-FP16-NEXT: vmov.32 d15[1], r1 +; CHECK-FP16-NEXT: vdup.32 d17, r2 ; CHECK-FP16-NEXT: mov r0, #0 +; CHECK-FP16-NEXT: vmov.32 d12[1], r1 ; CHECK-FP16-NEXT: movwlt r0, #1 ; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: mvnne r0, #0 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: vmov.32 d14[1], r5 -; CHECK-FP16-NEXT: mvnne r2, #0 -; CHECK-FP16-NEXT: vmov r5, r4, d11 -; CHECK-FP16-NEXT: vdup.32 d17, r0 -; CHECK-FP16-NEXT: rsbs r0, r9, #0 -; CHECK-FP16-NEXT: vdup.32 d16, r2 -; CHECK-FP16-NEXT: rscs r0, r8, #0 -; CHECK-FP16-NEXT: vbsl q8, q7, q6 +; CHECK-FP16-NEXT: vmov r2, r3, d9 +; CHECK-FP16-NEXT: vdup.32 d16, r0 +; CHECK-FP16-NEXT: rsbs r7, r7, #0 +; CHECK-FP16-NEXT: vbsl q8, q6, q5 +; CHECK-FP16-NEXT: rscs r7, r8, #0 +; CHECK-FP16-NEXT: mov r7, #0 +; CHECK-FP16-NEXT: movwlt r7, #1 +; CHECK-FP16-NEXT: cmp r7, #0 +; CHECK-FP16-NEXT: vmov r0, r1, d17 +; CHECK-FP16-NEXT: mvnne r7, #0 +; CHECK-FP16-NEXT: vmov r5, r4, d16 +; CHECK-FP16-NEXT: rsbs r0, r0, #0 +; CHECK-FP16-NEXT: rscs r0, r1, #0 ; CHECK-FP16-NEXT: mov r0, #0 ; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: vmov r1, r2, d16 -; CHECK-FP16-NEXT: vmov r3, r7, d17 -; CHECK-FP16-NEXT: rsbs r1, r1, #0 -; CHECK-FP16-NEXT: rscs r1, r2, #0 +; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: mvnne r0, #0 +; CHECK-FP16-NEXT: rsbs r1, r2, #0 +; CHECK-FP16-NEXT: rscs r1, r3, #0 +; CHECK-FP16-NEXT: vmov.32 d19[0], r0 ; CHECK-FP16-NEXT: mov r1, #0 ; CHECK-FP16-NEXT: movwlt r1, #1 -; CHECK-FP16-NEXT: rsbs r2, r3, #0 -; CHECK-FP16-NEXT: rscs r2, r7, #0 -; CHECK-FP16-NEXT: mov r2, #0 -; CHECK-FP16-NEXT: movwlt r2, #1 -; CHECK-FP16-NEXT: rsbs r3, r5, #0 -; CHECK-FP16-NEXT: rscs r3, r4, #0 +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: mvnne r1, #0 +; CHECK-FP16-NEXT: rsbs r0, r5, #0 +; CHECK-FP16-NEXT: rscs r0, r4, #0 +; CHECK-FP16-NEXT: vmov.32 d21[0], r1 ; CHECK-FP16-NEXT: movwlt r6, #1 ; CHECK-FP16-NEXT: cmp r6, #0 +; CHECK-FP16-NEXT: vmov.32 d20[0], r7 ; CHECK-FP16-NEXT: mvnne r6, #0 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: mvnne r2, #0 -; CHECK-FP16-NEXT: cmp r1, #0 -; CHECK-FP16-NEXT: mvnne r1, #0 -; CHECK-FP16-NEXT: vmov.32 d21[0], r2 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: vmov.32 d20[0], r1 -; CHECK-FP16-NEXT: mvnne r0, #0 -; CHECK-FP16-NEXT: vmov.32 d19[0], r6 -; CHECK-FP16-NEXT: vand q8, q8, q10 -; CHECK-FP16-NEXT: vmov.32 d18[0], r0 -; CHECK-FP16-NEXT: vmovn.i64 d1, q8 -; CHECK-FP16-NEXT: vand q9, q5, q9 -; CHECK-FP16-NEXT: vmovn.i64 d0, q9 -; CHECK-FP16-NEXT: vpop {d8} -; CHECK-FP16-NEXT: vpop {d10, d11, d12, d13, d14, d15} -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-FP16-NEXT: vmov.32 d18[0], r6 +; CHECK-FP16-NEXT: vand q10, q4, q10 +; CHECK-FP16-NEXT: vand q8, q8, q9 +; CHECK-FP16-NEXT: vmovn.i64 d1, q10 +; CHECK-FP16-NEXT: vmovn.i64 d0, q8 +; CHECK-FP16-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <4 x half> %x to <4 x i64> %0 = icmp slt <4 x i64> %conv, @@ -1643,59 +1624,56 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mvn r8, #0 -; CHECK-NEXT: subs r0, r0, r8 -; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: sbcs r0, r1, r6 -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: sbcs r0, r2, #0 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: mvn r9, #0 +; CHECK-NEXT: subs r1, r0, r9 +; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: sbcs r1, r4, r5 ; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: sbcs r0, r3, #0 +; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r9, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r3, r0 -; CHECK-NEXT: movne r0, r2 -; CHECK-NEXT: moveq r10, r6 -; CHECK-NEXT: moveq r5, r8 -; CHECK-NEXT: rsbs r1, r5, #0 -; CHECK-NEXT: rscs r1, r10, #-2147483648 -; CHECK-NEXT: sbcs r0, r8, r0 -; CHECK-NEXT: sbcs r0, r8, r3 +; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: mov r8, #-2147483648 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r3, r1 +; CHECK-NEXT: movne r1, r2 +; CHECK-NEXT: moveq r4, r5 +; CHECK-NEXT: moveq r0, r9 +; CHECK-NEXT: rsbs r2, r0, #0 +; CHECK-NEXT: rscs r2, r4, #-2147483648 +; CHECK-NEXT: sbcs r1, r9, r1 +; CHECK-NEXT: sbcs r1, r9, r3 ; CHECK-NEXT: movwlt r7, #1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r5, r7 +; CHECK-NEXT: movne r7, r0 +; CHECK-NEXT: moveq r4, r8 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r4, r0, r8 -; CHECK-NEXT: vmov.32 d1[0], r5 -; CHECK-NEXT: sbcs r4, r1, r6 -; CHECK-NEXT: sbcs r4, r2, #0 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: moveq r3, r4 -; CHECK-NEXT: movne r6, r1 -; CHECK-NEXT: movne r4, r2 -; CHECK-NEXT: moveq r0, r8 -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: rscs r1, r6, #-2147483648 -; CHECK-NEXT: sbcs r1, r8, r4 -; CHECK-NEXT: sbcs r1, r8, r3 -; CHECK-NEXT: movwlt r9, #1 -; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: subs r6, r0, r9 +; CHECK-NEXT: vmov.32 d1[0], r7 +; CHECK-NEXT: sbcs r6, r1, r5 +; CHECK-NEXT: sbcs r6, r2, #0 +; CHECK-NEXT: sbcs r6, r3, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: moveq r3, r6 +; CHECK-NEXT: movne r6, r2 +; CHECK-NEXT: movne r5, r1 ; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: mov r1, #-2147483648 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: moveq r10, r1 -; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: vmov.32 d1[1], r10 -; CHECK-NEXT: moveq r6, r1 -; CHECK-NEXT: vmov.32 d0[1], r6 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: rscs r1, r5, #-2147483648 +; CHECK-NEXT: sbcs r1, r9, r6 +; CHECK-NEXT: sbcs r1, r9, r3 +; CHECK-NEXT: movwlt r10, #1 +; CHECK-NEXT: cmp r10, #0 +; CHECK-NEXT: movne r10, r0 +; CHECK-NEXT: moveq r5, r8 +; CHECK-NEXT: vmov.32 d0[0], r10 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: @@ -1711,39 +1689,36 @@ entry: define <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-LABEL: utest_f64i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: subs r0, r2, #1 -; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: sbcs r0, r3, #0 -; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: movwlo r7, #1 -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: subs r1, r2, #1 +; CHECK-NEXT: vorr d0, d8, d8 +; CHECK-NEXT: sbcs r1, r3, #0 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: moveq r5, r7 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: movwlo r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: moveq r4, r6 +; CHECK-NEXT: movne r6, r0 ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r5 +; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r0, r6 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r4 +; CHECK-NEXT: movwlo r5, #1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: moveq r0, r5 +; CHECK-NEXT: movne r5, r1 ; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: vmov.32 d1[1], r7 -; CHECK-NEXT: movne r6, r1 -; CHECK-NEXT: vmov.32 d0[1], r6 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptoui <2 x double> %x to <2 x i128> %0 = icmp ult <2 x i128> %conv, @@ -1755,26 +1730,26 @@ entry: define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: subs r1, r2, #1 ; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: mov r9, #1 -; CHECK-NEXT: moveq r3, r6 -; CHECK-NEXT: moveq r8, r6 -; CHECK-NEXT: moveq r2, r9 -; CHECK-NEXT: movne r6, r0 -; CHECK-NEXT: rsbs r0, r6, #0 -; CHECK-NEXT: rscs r0, r8, #0 +; CHECK-NEXT: mov r8, #1 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: movge r2, r8 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r3, r1 +; CHECK-NEXT: moveq r4, r1 +; CHECK-NEXT: movne r1, r0 +; CHECK-NEXT: rsbs r0, r1, #0 +; CHECK-NEXT: rscs r0, r4, #0 ; CHECK-NEXT: vorr d0, d8, d8 ; CHECK-NEXT: rscs r0, r2, #0 ; CHECK-NEXT: mov r7, #0 @@ -1782,34 +1757,32 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: movwlt r7, #1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r6, r7 +; CHECK-NEXT: moveq r4, r7 +; CHECK-NEXT: movne r7, r1 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r4, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r6 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movne r9, r2 -; CHECK-NEXT: moveq r3, r4 -; CHECK-NEXT: moveq r1, r4 -; CHECK-NEXT: movne r4, r0 -; CHECK-NEXT: rsbs r0, r4, #0 +; CHECK-NEXT: subs r6, r2, #1 +; CHECK-NEXT: vmov.32 d1[0], r7 +; CHECK-NEXT: sbcs r6, r3, #0 +; CHECK-NEXT: movlt r8, r2 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: moveq r3, r2 +; CHECK-NEXT: moveq r1, r2 +; CHECK-NEXT: movne r2, r0 +; CHECK-NEXT: rsbs r0, r2, #0 ; CHECK-NEXT: rscs r0, r1, #0 -; CHECK-NEXT: rscs r0, r9, #0 +; CHECK-NEXT: rscs r0, r8, #0 ; CHECK-NEXT: rscs r0, r3, #0 ; CHECK-NEXT: movwlt r5, #1 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r8 -; CHECK-NEXT: vmov.32 d0[0], r4 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: vmov.32 d1[1], r7 +; CHECK-NEXT: moveq r2, r5 ; CHECK-NEXT: movne r5, r1 +; CHECK-NEXT: vmov.32 d0[0], r2 +; CHECK-NEXT: vmov.32 d1[1], r4 ; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -1830,59 +1803,56 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mvn r8, #0 -; CHECK-NEXT: subs r0, r0, r8 -; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: sbcs r0, r1, r6 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: mvn r9, #0 +; CHECK-NEXT: subs r1, r0, r9 +; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: sbcs r1, r4, r5 ; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: sbcs r0, r2, #0 -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: sbcs r0, r3, #0 +; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r9, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r3, r0 -; CHECK-NEXT: movne r0, r2 -; CHECK-NEXT: moveq r10, r6 -; CHECK-NEXT: moveq r5, r8 -; CHECK-NEXT: rsbs r1, r5, #0 -; CHECK-NEXT: rscs r1, r10, #-2147483648 -; CHECK-NEXT: sbcs r0, r8, r0 -; CHECK-NEXT: sbcs r0, r8, r3 +; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: mov r8, #-2147483648 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r3, r1 +; CHECK-NEXT: movne r1, r2 +; CHECK-NEXT: moveq r4, r5 +; CHECK-NEXT: moveq r0, r9 +; CHECK-NEXT: rsbs r2, r0, #0 +; CHECK-NEXT: rscs r2, r4, #-2147483648 +; CHECK-NEXT: sbcs r1, r9, r1 +; CHECK-NEXT: sbcs r1, r9, r3 ; CHECK-NEXT: movwlt r7, #1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r5, r7 +; CHECK-NEXT: movne r7, r0 +; CHECK-NEXT: moveq r4, r8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r4, r0, r8 -; CHECK-NEXT: vmov.32 d1[0], r5 -; CHECK-NEXT: sbcs r4, r1, r6 -; CHECK-NEXT: sbcs r4, r2, #0 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: moveq r3, r4 -; CHECK-NEXT: movne r6, r1 -; CHECK-NEXT: movne r4, r2 -; CHECK-NEXT: moveq r0, r8 -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: rscs r1, r6, #-2147483648 -; CHECK-NEXT: sbcs r1, r8, r4 -; CHECK-NEXT: sbcs r1, r8, r3 -; CHECK-NEXT: movwlt r9, #1 -; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: subs r6, r0, r9 +; CHECK-NEXT: vmov.32 d1[0], r7 +; CHECK-NEXT: sbcs r6, r1, r5 +; CHECK-NEXT: sbcs r6, r2, #0 +; CHECK-NEXT: sbcs r6, r3, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: moveq r3, r6 +; CHECK-NEXT: movne r6, r2 +; CHECK-NEXT: movne r5, r1 ; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: mov r1, #-2147483648 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: moveq r10, r1 -; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: vmov.32 d1[1], r10 -; CHECK-NEXT: moveq r6, r1 -; CHECK-NEXT: vmov.32 d0[1], r6 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: rscs r1, r5, #-2147483648 +; CHECK-NEXT: sbcs r1, r9, r6 +; CHECK-NEXT: sbcs r1, r9, r3 +; CHECK-NEXT: movwlt r10, #1 +; CHECK-NEXT: cmp r10, #0 +; CHECK-NEXT: movne r10, r0 +; CHECK-NEXT: moveq r5, r8 +; CHECK-NEXT: vmov.32 d0[0], r10 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8} ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: @@ -1898,39 +1868,36 @@ entry: define <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-LABEL: utest_f32i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} ; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: subs r0, r2, #1 -; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: sbcs r0, r3, #0 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: movwlo r7, #1 -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: subs r1, r2, #1 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: moveq r5, r7 +; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: movwlo r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: moveq r4, r6 +; CHECK-NEXT: movne r6, r0 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r5 +; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r0, r6 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r4 +; CHECK-NEXT: movwlo r5, #1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: moveq r0, r5 +; CHECK-NEXT: movne r5, r1 ; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: vmov.32 d1[1], r7 -; CHECK-NEXT: movne r6, r1 -; CHECK-NEXT: vmov.32 d0[1], r6 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptoui <2 x float> %x to <2 x i128> %0 = icmp ult <2 x i128> %conv, @@ -1942,61 +1909,59 @@ entry: define <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-LABEL: ustest_f32i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} ; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: subs r1, r2, #1 -; CHECK-NEXT: vmov.f32 s0, s16 ; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: mov r9, #1 -; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r3, r6 -; CHECK-NEXT: moveq r8, r6 -; CHECK-NEXT: moveq r2, r9 -; CHECK-NEXT: movne r6, r0 -; CHECK-NEXT: rsbs r0, r6, #0 +; CHECK-NEXT: mov r8, #1 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: vmov.f32 s0, s16 +; CHECK-NEXT: movge r2, r8 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: rscs r0, r8, #0 +; CHECK-NEXT: moveq r3, r1 +; CHECK-NEXT: moveq r4, r1 +; CHECK-NEXT: movne r1, r0 +; CHECK-NEXT: rsbs r0, r1, #0 +; CHECK-NEXT: rscs r0, r4, #0 ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: rscs r0, r2, #0 ; CHECK-NEXT: rscs r0, r3, #0 ; CHECK-NEXT: movwlt r7, #1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r6, r7 +; CHECK-NEXT: moveq r4, r7 +; CHECK-NEXT: movne r7, r1 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r4, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r6 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movne r9, r2 -; CHECK-NEXT: moveq r3, r4 -; CHECK-NEXT: moveq r1, r4 -; CHECK-NEXT: movne r4, r0 -; CHECK-NEXT: rsbs r0, r4, #0 +; CHECK-NEXT: subs r6, r2, #1 +; CHECK-NEXT: vmov.32 d1[0], r7 +; CHECK-NEXT: sbcs r6, r3, #0 +; CHECK-NEXT: movlt r8, r2 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movwlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: moveq r3, r2 +; CHECK-NEXT: moveq r1, r2 +; CHECK-NEXT: movne r2, r0 +; CHECK-NEXT: rsbs r0, r2, #0 ; CHECK-NEXT: rscs r0, r1, #0 -; CHECK-NEXT: rscs r0, r9, #0 +; CHECK-NEXT: rscs r0, r8, #0 ; CHECK-NEXT: rscs r0, r3, #0 ; CHECK-NEXT: movwlt r5, #1 ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r8 -; CHECK-NEXT: vmov.32 d0[0], r4 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: vmov.32 d1[1], r7 +; CHECK-NEXT: moveq r2, r5 ; CHECK-NEXT: movne r5, r1 +; CHECK-NEXT: vmov.32 d0[0], r2 +; CHECK-NEXT: vmov.32 d1[1], r4 ; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -2010,11 +1975,9 @@ entry: define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEON-LABEL: stest_f16i64: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEON-NEXT: .pad #4 -; CHECK-NEON-NEXT: sub sp, sp, #4 -; CHECK-NEON-NEXT: .vsave {d8} +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 ; CHECK-NEON-NEXT: vmov.f32 s16, s1 @@ -2024,62 +1987,58 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: mov r5, r0 +; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: mvn r9, #0 -; CHECK-NEON-NEXT: subs r0, r0, r9 -; CHECK-NEON-NEXT: mvn r7, #-2147483648 -; CHECK-NEON-NEXT: sbcs r0, r1, r7 -; CHECK-NEON-NEXT: mov r11, r1 -; CHECK-NEON-NEXT: sbcs r0, r2, #0 +; CHECK-NEON-NEXT: subs r1, r0, r9 +; CHECK-NEON-NEXT: mvn r6, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r4, r6 ; CHECK-NEON-NEXT: vmov s0, r8 -; CHECK-NEON-NEXT: sbcs r0, r3, #0 -; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: sbcs r1, r2, #0 +; CHECK-NEON-NEXT: mov r5, #0 +; CHECK-NEON-NEXT: sbcs r1, r3, #0 +; CHECK-NEON-NEXT: mov r8, #-2147483648 +; CHECK-NEON-NEXT: mov r1, #0 ; CHECK-NEON-NEXT: mov r10, #0 -; CHECK-NEON-NEXT: movwlt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: moveq r3, r0 -; CHECK-NEON-NEXT: movne r0, r2 -; CHECK-NEON-NEXT: moveq r11, r7 -; CHECK-NEON-NEXT: moveq r5, r9 -; CHECK-NEON-NEXT: rsbs r1, r5, #0 -; CHECK-NEON-NEXT: rscs r1, r11, #-2147483648 -; CHECK-NEON-NEXT: sbcs r0, r9, r0 -; CHECK-NEON-NEXT: sbcs r0, r9, r3 -; CHECK-NEON-NEXT: movwlt r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: moveq r5, r6 +; CHECK-NEON-NEXT: movwlt r1, #1 +; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: moveq r3, r1 +; CHECK-NEON-NEXT: movne r1, r2 +; CHECK-NEON-NEXT: moveq r4, r6 +; CHECK-NEON-NEXT: moveq r0, r9 +; CHECK-NEON-NEXT: rsbs r2, r0, #0 +; CHECK-NEON-NEXT: rscs r2, r4, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r9, r1 +; CHECK-NEON-NEXT: sbcs r1, r9, r3 +; CHECK-NEON-NEXT: movwlt r5, #1 +; CHECK-NEON-NEXT: cmp r5, #0 +; CHECK-NEON-NEXT: movne r5, r0 +; CHECK-NEON-NEXT: moveq r4, r8 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: subs r4, r0, r9 +; CHECK-NEON-NEXT: subs r7, r0, r9 ; CHECK-NEON-NEXT: vmov.32 d1[0], r5 -; CHECK-NEON-NEXT: sbcs r4, r1, r7 -; CHECK-NEON-NEXT: sbcs r4, r2, #0 -; CHECK-NEON-NEXT: sbcs r4, r3, #0 -; CHECK-NEON-NEXT: mov r4, #0 -; CHECK-NEON-NEXT: movwlt r4, #1 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: moveq r3, r4 -; CHECK-NEON-NEXT: movne r7, r1 -; CHECK-NEON-NEXT: movne r4, r2 +; CHECK-NEON-NEXT: sbcs r7, r1, r6 +; CHECK-NEON-NEXT: sbcs r7, r2, #0 +; CHECK-NEON-NEXT: sbcs r7, r3, #0 +; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: movwlt r7, #1 +; CHECK-NEON-NEXT: cmp r7, #0 +; CHECK-NEON-NEXT: moveq r3, r7 +; CHECK-NEON-NEXT: movne r7, r2 +; CHECK-NEON-NEXT: movne r6, r1 ; CHECK-NEON-NEXT: moveq r0, r9 ; CHECK-NEON-NEXT: rsbs r1, r0, #0 -; CHECK-NEON-NEXT: rscs r1, r7, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r9, r4 +; CHECK-NEON-NEXT: rscs r1, r6, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r9, r7 ; CHECK-NEON-NEXT: sbcs r1, r9, r3 ; CHECK-NEON-NEXT: movwlt r10, #1 ; CHECK-NEON-NEXT: cmp r10, #0 -; CHECK-NEON-NEXT: moveq r0, r10 -; CHECK-NEON-NEXT: mov r1, #-2147483648 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: vmov.32 d0[0], r0 -; CHECK-NEON-NEXT: moveq r11, r1 -; CHECK-NEON-NEXT: cmp r10, #0 -; CHECK-NEON-NEXT: vmov.32 d1[1], r11 -; CHECK-NEON-NEXT: moveq r7, r1 -; CHECK-NEON-NEXT: vmov.32 d0[1], r7 +; CHECK-NEON-NEXT: movne r10, r0 +; CHECK-NEON-NEXT: moveq r6, r8 +; CHECK-NEON-NEXT: vmov.32 d0[0], r10 +; CHECK-NEON-NEXT: vmov.32 d1[1], r4 +; CHECK-NEON-NEXT: vmov.32 d0[1], r6 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: add sp, sp, #4 -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; ; CHECK-FP16-LABEL: stest_f16i64: ; CHECK-FP16: @ %bb.0: @ %entry @@ -2089,59 +2048,56 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) { ; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: mov r5, r0 -; CHECK-FP16-NEXT: mvn r8, #0 -; CHECK-FP16-NEXT: subs r0, r0, r8 -; CHECK-FP16-NEXT: mvn r6, #-2147483648 -; CHECK-FP16-NEXT: sbcs r0, r1, r6 -; CHECK-FP16-NEXT: mov r10, r1 -; CHECK-FP16-NEXT: sbcs r0, r2, #0 +; CHECK-FP16-NEXT: mov r4, r1 +; CHECK-FP16-NEXT: mvn r9, #0 +; CHECK-FP16-NEXT: subs r1, r0, r9 +; CHECK-FP16-NEXT: mvn r5, #-2147483648 +; CHECK-FP16-NEXT: sbcs r1, r4, r5 ; CHECK-FP16-NEXT: vmov s0, r7 -; CHECK-FP16-NEXT: sbcs r0, r3, #0 +; CHECK-FP16-NEXT: sbcs r1, r2, #0 ; CHECK-FP16-NEXT: mov r7, #0 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: mov r9, #0 -; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: moveq r3, r0 -; CHECK-FP16-NEXT: movne r0, r2 -; CHECK-FP16-NEXT: moveq r10, r6 -; CHECK-FP16-NEXT: moveq r5, r8 -; CHECK-FP16-NEXT: rsbs r1, r5, #0 -; CHECK-FP16-NEXT: rscs r1, r10, #-2147483648 -; CHECK-FP16-NEXT: sbcs r0, r8, r0 -; CHECK-FP16-NEXT: sbcs r0, r8, r3 +; CHECK-FP16-NEXT: sbcs r1, r3, #0 +; CHECK-FP16-NEXT: mov r8, #-2147483648 +; CHECK-FP16-NEXT: mov r1, #0 +; CHECK-FP16-NEXT: mov r10, #0 +; CHECK-FP16-NEXT: movwlt r1, #1 +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: moveq r3, r1 +; CHECK-FP16-NEXT: movne r1, r2 +; CHECK-FP16-NEXT: moveq r4, r5 +; CHECK-FP16-NEXT: moveq r0, r9 +; CHECK-FP16-NEXT: rsbs r2, r0, #0 +; CHECK-FP16-NEXT: rscs r2, r4, #-2147483648 +; CHECK-FP16-NEXT: sbcs r1, r9, r1 +; CHECK-FP16-NEXT: sbcs r1, r9, r3 ; CHECK-FP16-NEXT: movwlt r7, #1 ; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: moveq r5, r7 +; CHECK-FP16-NEXT: movne r7, r0 +; CHECK-FP16-NEXT: moveq r4, r8 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: subs r4, r0, r8 -; CHECK-FP16-NEXT: vmov.32 d1[0], r5 -; CHECK-FP16-NEXT: sbcs r4, r1, r6 -; CHECK-FP16-NEXT: sbcs r4, r2, #0 -; CHECK-FP16-NEXT: sbcs r4, r3, #0 -; CHECK-FP16-NEXT: mov r4, #0 -; CHECK-FP16-NEXT: movwlt r4, #1 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: moveq r3, r4 -; CHECK-FP16-NEXT: movne r6, r1 -; CHECK-FP16-NEXT: movne r4, r2 -; CHECK-FP16-NEXT: moveq r0, r8 -; CHECK-FP16-NEXT: rsbs r1, r0, #0 -; CHECK-FP16-NEXT: rscs r1, r6, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r8, r4 -; CHECK-FP16-NEXT: sbcs r1, r8, r3 -; CHECK-FP16-NEXT: movwlt r9, #1 -; CHECK-FP16-NEXT: cmp r9, #0 +; CHECK-FP16-NEXT: subs r6, r0, r9 +; CHECK-FP16-NEXT: vmov.32 d1[0], r7 +; CHECK-FP16-NEXT: sbcs r6, r1, r5 +; CHECK-FP16-NEXT: sbcs r6, r2, #0 +; CHECK-FP16-NEXT: sbcs r6, r3, #0 +; CHECK-FP16-NEXT: mov r6, #0 +; CHECK-FP16-NEXT: movwlt r6, #1 +; CHECK-FP16-NEXT: cmp r6, #0 +; CHECK-FP16-NEXT: moveq r3, r6 +; CHECK-FP16-NEXT: movne r6, r2 +; CHECK-FP16-NEXT: movne r5, r1 ; CHECK-FP16-NEXT: moveq r0, r9 -; CHECK-FP16-NEXT: mov r1, #-2147483648 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: vmov.32 d0[0], r0 -; CHECK-FP16-NEXT: moveq r10, r1 -; CHECK-FP16-NEXT: cmp r9, #0 -; CHECK-FP16-NEXT: vmov.32 d1[1], r10 -; CHECK-FP16-NEXT: moveq r6, r1 -; CHECK-FP16-NEXT: vmov.32 d0[1], r6 +; CHECK-FP16-NEXT: rsbs r1, r0, #0 +; CHECK-FP16-NEXT: rscs r1, r5, #-2147483648 +; CHECK-FP16-NEXT: sbcs r1, r9, r6 +; CHECK-FP16-NEXT: sbcs r1, r9, r3 +; CHECK-FP16-NEXT: movwlt r10, #1 +; CHECK-FP16-NEXT: cmp r10, #0 +; CHECK-FP16-NEXT: movne r10, r0 +; CHECK-FP16-NEXT: moveq r5, r8 +; CHECK-FP16-NEXT: vmov.32 d0[0], r10 +; CHECK-FP16-NEXT: vmov.32 d1[1], r4 +; CHECK-FP16-NEXT: vmov.32 d0[1], r5 ; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> @@ -2156,8 +2112,8 @@ entry: define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NEON-LABEL: utesth_f16i64: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEON-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 @@ -2168,66 +2124,60 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixunssfti -; CHECK-NEON-NEXT: mov r6, r0 -; CHECK-NEON-NEXT: subs r0, r2, #1 +; CHECK-NEON-NEXT: mov r4, r1 +; CHECK-NEON-NEXT: subs r1, r2, #1 ; CHECK-NEON-NEXT: vmov s0, r5 -; CHECK-NEON-NEXT: sbcs r0, r3, #0 +; CHECK-NEON-NEXT: sbcs r1, r3, #0 ; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: mov r4, r1 +; CHECK-NEON-NEXT: mov r6, #0 ; CHECK-NEON-NEXT: movwlo r5, #1 ; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: moveq r6, r5 +; CHECK-NEON-NEXT: moveq r4, r5 +; CHECK-NEON-NEXT: movne r5, r0 ; CHECK-NEON-NEXT: bl __fixunssfti ; CHECK-NEON-NEXT: subs r2, r2, #1 -; CHECK-NEON-NEXT: vmov.32 d1[0], r6 +; CHECK-NEON-NEXT: vmov.32 d1[0], r5 ; CHECK-NEON-NEXT: sbcs r2, r3, #0 -; CHECK-NEON-NEXT: movwlo r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r0, r7 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: movne r5, r4 +; CHECK-NEON-NEXT: movwlo r6, #1 +; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: moveq r0, r6 +; CHECK-NEON-NEXT: movne r6, r1 ; CHECK-NEON-NEXT: vmov.32 d0[0], r0 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: vmov.32 d1[1], r5 -; CHECK-NEON-NEXT: movne r7, r1 -; CHECK-NEON-NEXT: vmov.32 d0[1], r7 +; CHECK-NEON-NEXT: vmov.32 d1[1], r4 +; CHECK-NEON-NEXT: vmov.32 d0[1], r6 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, pc} ; ; CHECK-FP16-LABEL: utesth_f16i64: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, lr} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] -; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] +; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixunshfti -; CHECK-FP16-NEXT: mov r5, r0 -; CHECK-FP16-NEXT: subs r0, r2, #1 -; CHECK-FP16-NEXT: vmov s0, r7 -; CHECK-FP16-NEXT: sbcs r0, r3, #0 -; CHECK-FP16-NEXT: mov r7, #0 ; CHECK-FP16-NEXT: mov r4, r1 -; CHECK-FP16-NEXT: movwlo r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 +; CHECK-FP16-NEXT: subs r1, r2, #1 +; CHECK-FP16-NEXT: vmov s0, r6 +; CHECK-FP16-NEXT: sbcs r1, r3, #0 ; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: moveq r5, r7 +; CHECK-FP16-NEXT: mov r5, #0 +; CHECK-FP16-NEXT: movwlo r6, #1 +; CHECK-FP16-NEXT: cmp r6, #0 +; CHECK-FP16-NEXT: moveq r4, r6 +; CHECK-FP16-NEXT: movne r6, r0 ; CHECK-FP16-NEXT: bl __fixunshfti ; CHECK-FP16-NEXT: subs r2, r2, #1 -; CHECK-FP16-NEXT: vmov.32 d1[0], r5 +; CHECK-FP16-NEXT: vmov.32 d1[0], r6 ; CHECK-FP16-NEXT: sbcs r2, r3, #0 -; CHECK-FP16-NEXT: movwlo r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r0, r6 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: movne r7, r4 +; CHECK-FP16-NEXT: movwlo r5, #1 +; CHECK-FP16-NEXT: cmp r5, #0 +; CHECK-FP16-NEXT: moveq r0, r5 +; CHECK-FP16-NEXT: movne r5, r1 ; CHECK-FP16-NEXT: vmov.32 d0[0], r0 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: vmov.32 d1[1], r7 -; CHECK-FP16-NEXT: movne r6, r1 -; CHECK-FP16-NEXT: vmov.32 d0[1], r6 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-FP16-NEXT: vmov.32 d1[1], r4 +; CHECK-FP16-NEXT: vmov.32 d0[1], r5 +; CHECK-FP16-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptoui <2 x half> %x to <2 x i128> %0 = icmp ult <2 x i128> %conv, @@ -2239,8 +2189,8 @@ entry: define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NEON-LABEL: ustest_f16i64: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 @@ -2251,110 +2201,106 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: mov r8, r1 +; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: subs r1, r2, #1 -; CHECK-NEON-NEXT: vmov s0, r5 ; CHECK-NEON-NEXT: sbcs r1, r3, #0 -; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: mov r9, #1 -; CHECK-NEON-NEXT: movwlt r5, #1 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: moveq r3, r5 -; CHECK-NEON-NEXT: moveq r8, r5 -; CHECK-NEON-NEXT: moveq r2, r9 -; CHECK-NEON-NEXT: movne r5, r0 -; CHECK-NEON-NEXT: rsbs r0, r5, #0 -; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: rscs r0, r8, #0 -; CHECK-NEON-NEXT: mov r6, #0 +; CHECK-NEON-NEXT: mov r8, #1 +; CHECK-NEON-NEXT: mov r1, #0 +; CHECK-NEON-NEXT: movge r2, r8 +; CHECK-NEON-NEXT: movwlt r1, #1 +; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: moveq r3, r1 +; CHECK-NEON-NEXT: moveq r4, r1 +; CHECK-NEON-NEXT: movne r1, r0 +; CHECK-NEON-NEXT: rsbs r0, r1, #0 +; CHECK-NEON-NEXT: rscs r0, r4, #0 +; CHECK-NEON-NEXT: vmov s0, r5 ; CHECK-NEON-NEXT: rscs r0, r2, #0 +; CHECK-NEON-NEXT: mov r7, #0 ; CHECK-NEON-NEXT: rscs r0, r3, #0 +; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: movwlt r7, #1 ; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r5, r7 +; CHECK-NEON-NEXT: moveq r4, r7 +; CHECK-NEON-NEXT: movne r7, r1 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: subs r4, r2, #1 -; CHECK-NEON-NEXT: vmov.32 d1[0], r5 -; CHECK-NEON-NEXT: sbcs r4, r3, #0 -; CHECK-NEON-NEXT: mov r4, #0 -; CHECK-NEON-NEXT: movwlt r4, #1 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: movne r9, r2 -; CHECK-NEON-NEXT: moveq r3, r4 -; CHECK-NEON-NEXT: moveq r1, r4 -; CHECK-NEON-NEXT: movne r4, r0 -; CHECK-NEON-NEXT: rsbs r0, r4, #0 +; CHECK-NEON-NEXT: subs r6, r2, #1 +; CHECK-NEON-NEXT: vmov.32 d1[0], r7 +; CHECK-NEON-NEXT: sbcs r6, r3, #0 +; CHECK-NEON-NEXT: movlt r8, r2 +; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: movwlt r2, #1 +; CHECK-NEON-NEXT: cmp r2, #0 +; CHECK-NEON-NEXT: moveq r3, r2 +; CHECK-NEON-NEXT: moveq r1, r2 +; CHECK-NEON-NEXT: movne r2, r0 +; CHECK-NEON-NEXT: rsbs r0, r2, #0 ; CHECK-NEON-NEXT: rscs r0, r1, #0 -; CHECK-NEON-NEXT: rscs r0, r9, #0 +; CHECK-NEON-NEXT: rscs r0, r8, #0 ; CHECK-NEON-NEXT: rscs r0, r3, #0 -; CHECK-NEON-NEXT: movwlt r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: moveq r4, r6 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: movne r7, r8 -; CHECK-NEON-NEXT: vmov.32 d0[0], r4 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: vmov.32 d1[1], r7 -; CHECK-NEON-NEXT: movne r6, r1 -; CHECK-NEON-NEXT: vmov.32 d0[1], r6 +; CHECK-NEON-NEXT: movwlt r5, #1 +; CHECK-NEON-NEXT: cmp r5, #0 +; CHECK-NEON-NEXT: moveq r2, r5 +; CHECK-NEON-NEXT: movne r5, r1 +; CHECK-NEON-NEXT: vmov.32 d0[0], r2 +; CHECK-NEON-NEXT: vmov.32 d1[1], r4 +; CHECK-NEON-NEXT: vmov.32 d0[1], r5 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} ; ; CHECK-FP16-LABEL: ustest_f16i64: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, lr} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] -; CHECK-FP16-NEXT: vmov.u16 r4, d0[0] +; CHECK-FP16-NEXT: vmov.u16 r5, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: mov r8, r1 +; CHECK-FP16-NEXT: mov r4, r1 ; CHECK-FP16-NEXT: subs r1, r2, #1 ; CHECK-FP16-NEXT: sbcs r1, r3, #0 -; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: movwlt r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: mov r9, #1 -; CHECK-FP16-NEXT: moveq r3, r6 -; CHECK-FP16-NEXT: moveq r8, r6 -; CHECK-FP16-NEXT: moveq r2, r9 -; CHECK-FP16-NEXT: movne r6, r0 -; CHECK-FP16-NEXT: rsbs r0, r6, #0 -; CHECK-FP16-NEXT: rscs r0, r8, #0 -; CHECK-FP16-NEXT: vmov s0, r4 +; CHECK-FP16-NEXT: mov r8, #1 +; CHECK-FP16-NEXT: mov r1, #0 +; CHECK-FP16-NEXT: movge r2, r8 +; CHECK-FP16-NEXT: movwlt r1, #1 +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: moveq r3, r1 +; CHECK-FP16-NEXT: moveq r4, r1 +; CHECK-FP16-NEXT: movne r1, r0 +; CHECK-FP16-NEXT: rsbs r0, r1, #0 +; CHECK-FP16-NEXT: rscs r0, r4, #0 +; CHECK-FP16-NEXT: vmov s0, r5 ; CHECK-FP16-NEXT: rscs r0, r2, #0 ; CHECK-FP16-NEXT: mov r7, #0 ; CHECK-FP16-NEXT: rscs r0, r3, #0 ; CHECK-FP16-NEXT: mov r5, #0 ; CHECK-FP16-NEXT: movwlt r7, #1 ; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: moveq r6, r7 +; CHECK-FP16-NEXT: moveq r4, r7 +; CHECK-FP16-NEXT: movne r7, r1 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: subs r4, r2, #1 -; CHECK-FP16-NEXT: vmov.32 d1[0], r6 -; CHECK-FP16-NEXT: sbcs r4, r3, #0 -; CHECK-FP16-NEXT: mov r4, #0 -; CHECK-FP16-NEXT: movwlt r4, #1 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: movne r9, r2 -; CHECK-FP16-NEXT: moveq r3, r4 -; CHECK-FP16-NEXT: moveq r1, r4 -; CHECK-FP16-NEXT: movne r4, r0 -; CHECK-FP16-NEXT: rsbs r0, r4, #0 +; CHECK-FP16-NEXT: subs r6, r2, #1 +; CHECK-FP16-NEXT: vmov.32 d1[0], r7 +; CHECK-FP16-NEXT: sbcs r6, r3, #0 +; CHECK-FP16-NEXT: movlt r8, r2 +; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: movwlt r2, #1 +; CHECK-FP16-NEXT: cmp r2, #0 +; CHECK-FP16-NEXT: moveq r3, r2 +; CHECK-FP16-NEXT: moveq r1, r2 +; CHECK-FP16-NEXT: movne r2, r0 +; CHECK-FP16-NEXT: rsbs r0, r2, #0 ; CHECK-FP16-NEXT: rscs r0, r1, #0 -; CHECK-FP16-NEXT: rscs r0, r9, #0 +; CHECK-FP16-NEXT: rscs r0, r8, #0 ; CHECK-FP16-NEXT: rscs r0, r3, #0 ; CHECK-FP16-NEXT: movwlt r5, #1 ; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: moveq r4, r5 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: movne r7, r8 -; CHECK-FP16-NEXT: vmov.32 d0[0], r4 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: vmov.32 d1[1], r7 +; CHECK-FP16-NEXT: moveq r2, r5 ; CHECK-FP16-NEXT: movne r5, r1 +; CHECK-FP16-NEXT: vmov.32 d0[0], r2 +; CHECK-FP16-NEXT: vmov.32 d1[1], r4 ; CHECK-FP16-NEXT: vmov.32 d0[1], r5 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -2380,34 +2326,34 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, r2, d9 -; CHECK-NEXT: mvn r5, #-2147483648 -; CHECK-NEXT: subs r3, r4, r5 -; CHECK-NEXT: sbcs r3, r1, #0 -; CHECK-NEXT: mvn r7, #0 -; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: mov r8, #-2147483648 -; CHECK-NEXT: movwlt r3, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: movne r3, r1 -; CHECK-NEXT: moveq r4, r5 -; CHECK-NEXT: rsbs r1, r4, #-2147483648 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: sbcs r1, r7, r3 -; CHECK-NEXT: movge r4, r8 -; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: mov r8, r1 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: mvn r6, #-2147483648 +; CHECK-NEXT: subs r2, r4, r6 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: sbcs r2, r8, #0 +; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: movge r4, r6 +; CHECK-NEXT: movwlt r5, #1 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: subs r2, r0, r5 -; CHECK-NEXT: vmov.32 d0[0], r4 +; CHECK-NEXT: subs r2, r0, r6 ; CHECK-NEXT: sbcs r2, r1, #0 -; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: movne r6, r1 -; CHECK-NEXT: movne r5, r0 -; CHECK-NEXT: rsbs r0, r5, #-2147483648 -; CHECK-NEXT: sbcs r0, r7, r6 -; CHECK-NEXT: movge r5, r8 -; CHECK-NEXT: vmov.32 d0[1], r5 +; CHECK-NEXT: movlt r6, r0 +; CHECK-NEXT: movwlt r7, #1 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: mov r0, #-2147483648 +; CHECK-NEXT: movne r7, r1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: movne r5, r8 +; CHECK-NEXT: rsbs r2, r4, #-2147483648 +; CHECK-NEXT: mvn r1, #0 +; CHECK-NEXT: sbcs r2, r1, r5 +; CHECK-NEXT: movge r4, r0 +; CHECK-NEXT: rsbs r2, r6, #-2147483648 +; CHECK-NEXT: vmov.32 d0[0], r4 +; CHECK-NEXT: sbcs r1, r1, r7 +; CHECK-NEXT: movge r6, r0 +; CHECK-NEXT: vmov.32 d0[1], r6 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} entry: @@ -2460,16 +2406,16 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vmov r2, r12, d9 -; CHECK-NEXT: mvn r5, #0 -; CHECK-NEXT: subs r3, r0, r5 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: sbcs r3, r1, #0 -; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: mvn r4, #0 +; CHECK-NEXT: subs r5, r0, r4 ; CHECK-NEXT: mov r3, #0 +; CHECK-NEXT: sbcs r5, r1, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movge r0, r4 ; CHECK-NEXT: movwlt r3, #1 ; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: movne r3, r1 -; CHECK-NEXT: moveq r0, r5 ; CHECK-NEXT: rsbs r1, r0, #0 ; CHECK-NEXT: rscs r1, r3, #0 ; CHECK-NEXT: movwlt r6, #1 @@ -2478,21 +2424,21 @@ define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: mov r1, r12 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: subs r2, r0, r5 +; CHECK-NEXT: subs r2, r0, r4 ; CHECK-NEXT: vmov.32 d0[0], r6 ; CHECK-NEXT: sbcs r2, r1, #0 -; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: movne r2, r1 -; CHECK-NEXT: movne r5, r0 -; CHECK-NEXT: rsbs r0, r5, #0 -; CHECK-NEXT: rscs r0, r2, #0 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movne r4, r5 -; CHECK-NEXT: vmov.32 d0[1], r4 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: movlt r4, r0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r0, r1 +; CHECK-NEXT: rsbs r1, r4, #0 +; CHECK-NEXT: rscs r0, r0, #0 +; CHECK-NEXT: movwlt r5, #1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: movne r5, r4 +; CHECK-NEXT: vmov.32 d0[1], r5 +; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptosi <2 x double> %x to <2 x i64> @@ -2505,72 +2451,81 @@ entry: define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: stest_f32i32_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, sp, #8 ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: mov r8, #-2147483648 -; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: mov r10, #0 ; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: vmov r5, s16 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mvn r7, #-2147483648 -; CHECK-NEXT: subs r0, r0, r7 -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r0, r1 -; CHECK-NEXT: moveq r4, r7 -; CHECK-NEXT: rsbs r1, r4, #-2147483648 -; CHECK-NEXT: sbcs r0, r9, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: movge r4, r8 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: subs r0, r0, r7 -; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: vmov r2, s18 +; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: mvn r6, #-2147483648 +; CHECK-NEXT: mov r3, #-2147483648 +; CHECK-NEXT: mvn r10, #0 +; CHECK-NEXT: vmov r7, s16 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: subs r2, r11, r6 +; CHECK-NEXT: sbcs r2, r1, #0 ; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: movge r11, r6 ; CHECK-NEXT: movwlt r2, #1 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: movne r2, r1 -; CHECK-NEXT: moveq r5, r7 -; CHECK-NEXT: rsbs r1, r5, #-2147483648 -; CHECK-NEXT: sbcs r1, r9, r2 -; CHECK-NEXT: movge r5, r8 +; CHECK-NEXT: rsbs r1, r11, #-2147483648 +; CHECK-NEXT: sbcs r1, r10, r2 +; CHECK-NEXT: movge r11, r3 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: subs r0, r0, r7 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: subs r0, r0, r6 ; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: movne r0, r1 -; CHECK-NEXT: moveq r6, r7 -; CHECK-NEXT: rsbs r1, r6, #-2147483648 -; CHECK-NEXT: sbcs r0, r9, r0 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: movge r6, r8 +; CHECK-NEXT: mov r9, #0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: movge r5, r6 +; CHECK-NEXT: movwlt r9, #1 +; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: movne r9, r1 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r0, r7 -; CHECK-NEXT: vmov.32 d1[0], r6 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: subs r0, r0, r6 +; CHECK-NEXT: sbcs r0, r1, #0 +; CHECK-NEXT: mov r8, #0 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: movge r7, r6 +; CHECK-NEXT: movwlt r8, #1 +; CHECK-NEXT: cmp r8, #0 +; CHECK-NEXT: movne r8, r1 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: subs r2, r0, r6 ; CHECK-NEXT: sbcs r2, r1, #0 -; CHECK-NEXT: movwlt r10, #1 -; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: movne r10, r1 -; CHECK-NEXT: movne r7, r0 +; CHECK-NEXT: movlt r6, r0 +; CHECK-NEXT: movwlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: movne r4, r1 +; CHECK-NEXT: rsbs r0, r6, #-2147483648 +; CHECK-NEXT: sbcs r0, r10, r4 +; CHECK-NEXT: mov r1, #-2147483648 +; CHECK-NEXT: movge r6, r1 ; CHECK-NEXT: rsbs r0, r7, #-2147483648 -; CHECK-NEXT: vmov.32 d0[0], r5 -; CHECK-NEXT: sbcs r0, r9, r10 -; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: movge r7, r8 -; CHECK-NEXT: vmov.32 d0[1], r7 +; CHECK-NEXT: sbcs r0, r10, r8 +; CHECK-NEXT: vmov.32 d1[0], r6 +; CHECK-NEXT: movge r7, r1 +; CHECK-NEXT: rsbs r0, r5, #-2147483648 +; CHECK-NEXT: vmov.32 d0[0], r7 +; CHECK-NEXT: sbcs r0, r10, r9 +; CHECK-NEXT: movge r5, r1 +; CHECK-NEXT: vmov.32 d1[1], r11 +; CHECK-NEXT: vmov.32 d0[1], r5 +; CHECK-NEXT: add sp, sp, #8 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: add sp, sp, #4 +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <4 x float> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2636,33 +2591,33 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vmov r2, s16 -; CHECK-NEXT: mvn r7, #0 -; CHECK-NEXT: subs r3, r0, r7 +; CHECK-NEXT: mvn r6, #0 +; CHECK-NEXT: subs r3, r0, r6 ; CHECK-NEXT: mov r4, #0 ; CHECK-NEXT: sbcs r3, r1, #0 -; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: vmov r8, s17 ; CHECK-NEXT: mov r3, #0 -; CHECK-NEXT: vmov r9, s18 +; CHECK-NEXT: movge r0, r6 ; CHECK-NEXT: movwlt r3, #1 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: movne r3, r1 -; CHECK-NEXT: moveq r0, r7 ; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: vmov r8, s17 ; CHECK-NEXT: rscs r1, r3, #0 +; CHECK-NEXT: vmov r9, s18 ; CHECK-NEXT: movwlt r4, #1 ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: movne r4, r0 +; CHECK-NEXT: mov r10, #0 ; CHECK-NEXT: mov r0, r2 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r0, r7 +; CHECK-NEXT: subs r2, r0, r6 ; CHECK-NEXT: mov r5, #0 ; CHECK-NEXT: sbcs r2, r1, #0 ; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movge r0, r6 ; CHECK-NEXT: movwlt r2, #1 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: movne r2, r1 -; CHECK-NEXT: moveq r0, r7 ; CHECK-NEXT: rsbs r1, r0, #0 ; CHECK-NEXT: rscs r1, r2, #0 ; CHECK-NEXT: movwlt r5, #1 @@ -2670,36 +2625,36 @@ define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-NEXT: movne r5, r0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r0, r7 -; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: subs r2, r0, r6 +; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: sbcs r2, r1, #0 ; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movge r0, r6 ; CHECK-NEXT: movwlt r2, #1 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: movne r2, r1 -; CHECK-NEXT: moveq r0, r7 ; CHECK-NEXT: rsbs r1, r0, #0 ; CHECK-NEXT: rscs r1, r2, #0 -; CHECK-NEXT: movwlt r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: movne r6, r0 +; CHECK-NEXT: movwlt r7, #1 +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: movne r7, r0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: subs r2, r0, r7 -; CHECK-NEXT: vmov.32 d1[0], r6 +; CHECK-NEXT: subs r2, r0, r6 +; CHECK-NEXT: vmov.32 d1[0], r7 ; CHECK-NEXT: sbcs r2, r1, #0 -; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r6, r0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movne r0, r1 +; CHECK-NEXT: rsbs r1, r6, #0 +; CHECK-NEXT: rscs r0, r0, #0 ; CHECK-NEXT: vmov.32 d0[0], r5 -; CHECK-NEXT: movwlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: movne r2, r1 -; CHECK-NEXT: movne r7, r0 -; CHECK-NEXT: rsbs r0, r7, #0 -; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: rscs r0, r2, #0 ; CHECK-NEXT: movwlt r10, #1 ; CHECK-NEXT: cmp r10, #0 -; CHECK-NEXT: movne r10, r7 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: movne r10, r6 ; CHECK-NEXT: vmov.32 d0[1], r10 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} @@ -2714,150 +2669,164 @@ entry: define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-LABEL: stest_f16i32_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-NEON-NEXT: .pad #4 +; CHECK-NEON-NEXT: sub sp, sp, #4 ; CHECK-NEON-NEXT: .vsave {d8, d9, d10} ; CHECK-NEON-NEXT: vpush {d8, d9, d10} +; CHECK-NEON-NEXT: .pad #8 +; CHECK-NEON-NEXT: sub sp, sp, #8 ; CHECK-NEON-NEXT: vmov r0, s3 -; CHECK-NEON-NEXT: vmov.f32 s18, s2 -; CHECK-NEON-NEXT: vmov.f32 s16, s1 +; CHECK-NEON-NEXT: vmov.f32 s16, s2 +; CHECK-NEON-NEXT: vmov.f32 s18, s1 ; CHECK-NEON-NEXT: vmov.f32 s20, s0 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r4, r0 -; CHECK-NEON-NEXT: vmov r0, s20 -; CHECK-NEON-NEXT: mvn r7, #-2147483648 -; CHECK-NEON-NEXT: subs r2, r4, r7 +; CHECK-NEON-NEXT: vmov r2, s16 +; CHECK-NEON-NEXT: mov r11, r0 +; CHECK-NEON-NEXT: vmov r0, s18 +; CHECK-NEON-NEXT: mvn r6, #-2147483648 +; CHECK-NEON-NEXT: mov r3, #-2147483648 +; CHECK-NEON-NEXT: mvn r10, #0 +; CHECK-NEON-NEXT: vmov r7, s20 +; CHECK-NEON-NEXT: mov r4, #0 +; CHECK-NEON-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-NEON-NEXT: subs r2, r11, r6 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 -; CHECK-NEON-NEXT: mov r8, #-2147483648 ; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: mvn r9, #0 +; CHECK-NEON-NEXT: movge r11, r6 ; CHECK-NEON-NEXT: movwlt r2, #1 ; CHECK-NEON-NEXT: cmp r2, #0 ; CHECK-NEON-NEXT: movne r2, r1 -; CHECK-NEON-NEXT: moveq r4, r7 -; CHECK-NEON-NEXT: rsbs r1, r4, #-2147483648 -; CHECK-NEON-NEXT: mov r10, #0 -; CHECK-NEON-NEXT: sbcs r1, r9, r2 -; CHECK-NEON-NEXT: movge r4, r8 +; CHECK-NEON-NEXT: rsbs r1, r11, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r10, r2 +; CHECK-NEON-NEXT: movge r11, r3 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: mov r5, r0 -; CHECK-NEON-NEXT: subs r0, r0, r7 +; CHECK-NEON-NEXT: subs r0, r0, r6 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 -; CHECK-NEON-NEXT: mov r2, #0 -; CHECK-NEON-NEXT: vmov r0, s18 -; CHECK-NEON-NEXT: movwlt r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: movne r2, r1 -; CHECK-NEON-NEXT: moveq r5, r7 -; CHECK-NEON-NEXT: rsbs r1, r5, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r9, r2 -; CHECK-NEON-NEXT: movge r5, r8 +; CHECK-NEON-NEXT: mov r8, #0 +; CHECK-NEON-NEXT: mov r0, r7 +; CHECK-NEON-NEXT: movge r5, r6 +; CHECK-NEON-NEXT: movwlt r8, #1 +; CHECK-NEON-NEXT: cmp r8, #0 +; CHECK-NEON-NEXT: movne r8, r1 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: mov r6, r0 -; CHECK-NEON-NEXT: subs r0, r0, r7 +; CHECK-NEON-NEXT: mov r7, r0 +; CHECK-NEON-NEXT: subs r0, r0, r6 ; CHECK-NEON-NEXT: sbcs r0, r1, #0 -; CHECK-NEON-NEXT: mov r0, #0 -; CHECK-NEON-NEXT: movwlt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: movne r0, r1 -; CHECK-NEON-NEXT: moveq r6, r7 -; CHECK-NEON-NEXT: rsbs r1, r6, #-2147483648 -; CHECK-NEON-NEXT: sbcs r0, r9, r0 -; CHECK-NEON-NEXT: vmov r0, s16 -; CHECK-NEON-NEXT: movge r6, r8 +; CHECK-NEON-NEXT: mov r9, #0 +; CHECK-NEON-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEON-NEXT: movge r7, r6 +; CHECK-NEON-NEXT: movwlt r9, #1 +; CHECK-NEON-NEXT: cmp r9, #0 +; CHECK-NEON-NEXT: movne r9, r1 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r0, r7 -; CHECK-NEON-NEXT: vmov.32 d1[0], r6 +; CHECK-NEON-NEXT: subs r2, r0, r6 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 -; CHECK-NEON-NEXT: movwlt r10, #1 -; CHECK-NEON-NEXT: cmp r10, #0 -; CHECK-NEON-NEXT: movne r10, r1 -; CHECK-NEON-NEXT: movne r7, r0 +; CHECK-NEON-NEXT: movlt r6, r0 +; CHECK-NEON-NEXT: movwlt r4, #1 +; CHECK-NEON-NEXT: cmp r4, #0 +; CHECK-NEON-NEXT: movne r4, r1 +; CHECK-NEON-NEXT: rsbs r0, r6, #-2147483648 +; CHECK-NEON-NEXT: sbcs r0, r10, r4 +; CHECK-NEON-NEXT: mov r1, #-2147483648 +; CHECK-NEON-NEXT: movge r6, r1 ; CHECK-NEON-NEXT: rsbs r0, r7, #-2147483648 -; CHECK-NEON-NEXT: vmov.32 d0[0], r5 -; CHECK-NEON-NEXT: sbcs r0, r9, r10 -; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: movge r7, r8 -; CHECK-NEON-NEXT: vmov.32 d0[1], r7 +; CHECK-NEON-NEXT: sbcs r0, r10, r9 +; CHECK-NEON-NEXT: vmov.32 d1[0], r6 +; CHECK-NEON-NEXT: movge r7, r1 +; CHECK-NEON-NEXT: rsbs r0, r5, #-2147483648 +; CHECK-NEON-NEXT: vmov.32 d0[0], r7 +; CHECK-NEON-NEXT: sbcs r0, r10, r8 +; CHECK-NEON-NEXT: movge r5, r1 +; CHECK-NEON-NEXT: vmov.32 d1[1], r11 +; CHECK-NEON-NEXT: vmov.32 d0[1], r5 +; CHECK-NEON-NEXT: add sp, sp, #8 ; CHECK-NEON-NEXT: vpop {d8, d9, d10} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEON-NEXT: add sp, sp, #4 +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; CHECK-FP16-LABEL: stest_f16i32_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; CHECK-FP16-NEXT: .pad #4 +; CHECK-FP16-NEXT: sub sp, sp, #4 ; CHECK-FP16-NEXT: .vsave {d8, d9} ; CHECK-FP16-NEXT: vpush {d8, d9} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[3] -; CHECK-FP16-NEXT: vorr d8, d0, d0 -; CHECK-FP16-NEXT: vmov.u16 r5, d0[2] -; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] +; CHECK-FP16-NEXT: vmov.u16 r4, d0[2] +; CHECK-FP16-NEXT: vmov.u16 r5, d0[0] +; CHECK-FP16-NEXT: vmov.u16 r6, d0[1] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: mov r4, r0 +; CHECK-FP16-NEXT: mov r10, r0 ; CHECK-FP16-NEXT: mvn r7, #-2147483648 ; CHECK-FP16-NEXT: subs r0, r0, r7 ; CHECK-FP16-NEXT: vmov s0, r6 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 -; CHECK-FP16-NEXT: mov r8, #-2147483648 +; CHECK-FP16-NEXT: mov r2, #-2147483648 ; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: mvn r9, #0 +; CHECK-FP16-NEXT: movge r10, r7 ; CHECK-FP16-NEXT: movwlt r0, #1 ; CHECK-FP16-NEXT: cmp r0, #0 ; CHECK-FP16-NEXT: movne r0, r1 -; CHECK-FP16-NEXT: moveq r4, r7 -; CHECK-FP16-NEXT: rsbs r1, r4, #-2147483648 -; CHECK-FP16-NEXT: mov r10, #0 +; CHECK-FP16-NEXT: rsbs r1, r10, #-2147483648 +; CHECK-FP16-NEXT: mvn r9, #0 ; CHECK-FP16-NEXT: sbcs r0, r9, r0 +; CHECK-FP16-NEXT: vmov s16, r4 +; CHECK-FP16-NEXT: mov r11, #0 ; CHECK-FP16-NEXT: vmov s18, r5 -; CHECK-FP16-NEXT: movge r4, r8 +; CHECK-FP16-NEXT: movge r10, r2 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: vmov.f32 s0, s18 ; CHECK-FP16-NEXT: mov r5, r0 ; CHECK-FP16-NEXT: subs r0, r0, r7 +; CHECK-FP16-NEXT: mov r4, #0 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: movne r0, r1 -; CHECK-FP16-NEXT: moveq r5, r7 -; CHECK-FP16-NEXT: rsbs r1, r5, #-2147483648 -; CHECK-FP16-NEXT: sbcs r0, r9, r0 -; CHECK-FP16-NEXT: movge r5, r8 +; CHECK-FP16-NEXT: movge r5, r7 +; CHECK-FP16-NEXT: movwlt r4, #1 +; CHECK-FP16-NEXT: cmp r4, #0 +; CHECK-FP16-NEXT: movne r4, r1 ; CHECK-FP16-NEXT: bl __fixhfdi +; CHECK-FP16-NEXT: vmov.f32 s0, s16 ; CHECK-FP16-NEXT: mov r6, r0 ; CHECK-FP16-NEXT: subs r0, r0, r7 +; CHECK-FP16-NEXT: mov r8, #0 ; CHECK-FP16-NEXT: sbcs r0, r1, #0 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: movne r0, r1 -; CHECK-FP16-NEXT: vmov.u16 r1, d8[1] -; CHECK-FP16-NEXT: moveq r6, r7 -; CHECK-FP16-NEXT: vmov s0, r1 -; CHECK-FP16-NEXT: rsbs r1, r6, #-2147483648 -; CHECK-FP16-NEXT: sbcs r0, r9, r0 -; CHECK-FP16-NEXT: movge r6, r8 +; CHECK-FP16-NEXT: movge r6, r7 +; CHECK-FP16-NEXT: movwlt r8, #1 +; CHECK-FP16-NEXT: cmp r8, #0 +; CHECK-FP16-NEXT: movne r8, r1 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: subs r2, r0, r7 -; CHECK-FP16-NEXT: vmov.32 d1[0], r6 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 -; CHECK-FP16-NEXT: movwlt r10, #1 -; CHECK-FP16-NEXT: cmp r10, #0 -; CHECK-FP16-NEXT: movne r10, r1 -; CHECK-FP16-NEXT: movne r7, r0 +; CHECK-FP16-NEXT: movlt r7, r0 +; CHECK-FP16-NEXT: movwlt r11, #1 +; CHECK-FP16-NEXT: cmp r11, #0 +; CHECK-FP16-NEXT: movne r11, r1 ; CHECK-FP16-NEXT: rsbs r0, r7, #-2147483648 -; CHECK-FP16-NEXT: vmov.32 d0[0], r5 -; CHECK-FP16-NEXT: sbcs r0, r9, r10 -; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: movge r7, r8 -; CHECK-FP16-NEXT: vmov.32 d0[1], r7 +; CHECK-FP16-NEXT: sbcs r0, r9, r11 +; CHECK-FP16-NEXT: mov r1, #-2147483648 +; CHECK-FP16-NEXT: movge r7, r1 +; CHECK-FP16-NEXT: rsbs r0, r6, #-2147483648 +; CHECK-FP16-NEXT: sbcs r0, r9, r8 +; CHECK-FP16-NEXT: vmov.32 d1[0], r7 +; CHECK-FP16-NEXT: movge r6, r1 +; CHECK-FP16-NEXT: rsbs r0, r5, #-2147483648 +; CHECK-FP16-NEXT: vmov.32 d0[0], r6 +; CHECK-FP16-NEXT: sbcs r0, r9, r4 +; CHECK-FP16-NEXT: movge r5, r1 +; CHECK-FP16-NEXT: vmov.32 d1[1], r10 +; CHECK-FP16-NEXT: vmov.32 d0[1], r5 ; CHECK-FP16-NEXT: vpop {d8, d9} -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-FP16-NEXT: add sp, sp, #4 +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %conv = fptosi <4 x half> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2975,34 +2944,34 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz ; CHECK-NEON-NEXT: vmov r2, s20 -; CHECK-NEON-NEXT: mvn r7, #0 -; CHECK-NEON-NEXT: subs r3, r0, r7 +; CHECK-NEON-NEXT: mvn r6, #0 +; CHECK-NEON-NEXT: subs r3, r0, r6 ; CHECK-NEON-NEXT: mov r4, #0 ; CHECK-NEON-NEXT: sbcs r3, r1, #0 -; CHECK-NEON-NEXT: mov r10, #0 -; CHECK-NEON-NEXT: mov r3, #0 ; CHECK-NEON-NEXT: vmov r8, s18 +; CHECK-NEON-NEXT: mov r3, #0 +; CHECK-NEON-NEXT: movge r0, r6 ; CHECK-NEON-NEXT: movwlt r3, #1 ; CHECK-NEON-NEXT: cmp r3, #0 ; CHECK-NEON-NEXT: movne r3, r1 -; CHECK-NEON-NEXT: moveq r0, r7 ; CHECK-NEON-NEXT: rsbs r1, r0, #0 -; CHECK-NEON-NEXT: vmov r9, s16 ; CHECK-NEON-NEXT: rscs r1, r3, #0 +; CHECK-NEON-NEXT: vmov r9, s16 ; CHECK-NEON-NEXT: movwlt r4, #1 ; CHECK-NEON-NEXT: cmp r4, #0 ; CHECK-NEON-NEXT: movne r4, r0 +; CHECK-NEON-NEXT: mov r10, #0 ; CHECK-NEON-NEXT: mov r0, r2 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r0, r7 +; CHECK-NEON-NEXT: subs r2, r0, r6 ; CHECK-NEON-NEXT: mov r5, #0 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 ; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: movge r0, r6 ; CHECK-NEON-NEXT: movwlt r2, #1 ; CHECK-NEON-NEXT: cmp r2, #0 ; CHECK-NEON-NEXT: movne r2, r1 -; CHECK-NEON-NEXT: moveq r0, r7 ; CHECK-NEON-NEXT: rsbs r1, r0, #0 ; CHECK-NEON-NEXT: rscs r1, r2, #0 ; CHECK-NEON-NEXT: movwlt r5, #1 @@ -3011,37 +2980,37 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NEON-NEXT: mov r0, r9 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r0, r7 -; CHECK-NEON-NEXT: mov r6, #0 +; CHECK-NEON-NEXT: subs r2, r0, r6 +; CHECK-NEON-NEXT: mov r7, #0 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 ; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: movge r0, r6 ; CHECK-NEON-NEXT: movwlt r2, #1 ; CHECK-NEON-NEXT: cmp r2, #0 ; CHECK-NEON-NEXT: movne r2, r1 -; CHECK-NEON-NEXT: moveq r0, r7 ; CHECK-NEON-NEXT: rsbs r1, r0, #0 ; CHECK-NEON-NEXT: rscs r1, r2, #0 -; CHECK-NEON-NEXT: movwlt r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: movne r6, r0 +; CHECK-NEON-NEXT: movwlt r7, #1 +; CHECK-NEON-NEXT: cmp r7, #0 +; CHECK-NEON-NEXT: movne r7, r0 ; CHECK-NEON-NEXT: mov r0, r8 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: bl __aeabi_f2lz -; CHECK-NEON-NEXT: subs r2, r0, r7 -; CHECK-NEON-NEXT: vmov.32 d1[0], r6 +; CHECK-NEON-NEXT: subs r2, r0, r6 +; CHECK-NEON-NEXT: vmov.32 d1[0], r7 ; CHECK-NEON-NEXT: sbcs r2, r1, #0 -; CHECK-NEON-NEXT: mov r2, #0 +; CHECK-NEON-NEXT: movlt r6, r0 +; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: movwlt r0, #1 +; CHECK-NEON-NEXT: cmp r0, #0 +; CHECK-NEON-NEXT: movne r0, r1 +; CHECK-NEON-NEXT: rsbs r1, r6, #0 +; CHECK-NEON-NEXT: rscs r0, r0, #0 ; CHECK-NEON-NEXT: vmov.32 d0[0], r5 -; CHECK-NEON-NEXT: movwlt r2, #1 -; CHECK-NEON-NEXT: cmp r2, #0 -; CHECK-NEON-NEXT: movne r2, r1 -; CHECK-NEON-NEXT: movne r7, r0 -; CHECK-NEON-NEXT: rsbs r0, r7, #0 -; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: rscs r0, r2, #0 ; CHECK-NEON-NEXT: movwlt r10, #1 ; CHECK-NEON-NEXT: cmp r10, #0 -; CHECK-NEON-NEXT: movne r10, r7 +; CHECK-NEON-NEXT: vmov.32 d1[1], r4 +; CHECK-NEON-NEXT: movne r10, r6 ; CHECK-NEON-NEXT: vmov.32 d0[1], r10 ; CHECK-NEON-NEXT: vpop {d8, d9, d10} ; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} @@ -3054,75 +3023,75 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-FP16-NEXT: vpush {d8, d9} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[3] ; CHECK-FP16-NEXT: vorr d8, d0, d0 -; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] -; CHECK-FP16-NEXT: vmov.u16 r7, d0[2] +; CHECK-FP16-NEXT: vmov.u16 r5, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfdi ; CHECK-FP16-NEXT: vmov.u16 r2, d8[1] -; CHECK-FP16-NEXT: mvn r5, #0 -; CHECK-FP16-NEXT: vmov s0, r6 +; CHECK-FP16-NEXT: mvn r4, #0 +; CHECK-FP16-NEXT: vmov.u16 r3, d8[2] +; CHECK-FP16-NEXT: vmov s0, r5 ; CHECK-FP16-NEXT: mov r6, #0 ; CHECK-FP16-NEXT: mov r8, #0 -; CHECK-FP16-NEXT: vmov s18, r7 ; CHECK-FP16-NEXT: vmov s16, r2 -; CHECK-FP16-NEXT: subs r2, r0, r5 +; CHECK-FP16-NEXT: subs r2, r0, r4 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 +; CHECK-FP16-NEXT: vmov s18, r3 ; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: movge r0, r4 ; CHECK-FP16-NEXT: movwlt r2, #1 ; CHECK-FP16-NEXT: cmp r2, #0 ; CHECK-FP16-NEXT: movne r2, r1 -; CHECK-FP16-NEXT: moveq r0, r5 ; CHECK-FP16-NEXT: rsbs r1, r0, #0 ; CHECK-FP16-NEXT: rscs r1, r2, #0 ; CHECK-FP16-NEXT: movwlt r6, #1 ; CHECK-FP16-NEXT: cmp r6, #0 ; CHECK-FP16-NEXT: movne r6, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: subs r2, r0, r5 +; CHECK-FP16-NEXT: subs r2, r0, r4 ; CHECK-FP16-NEXT: vmov.f32 s0, s18 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 ; CHECK-FP16-NEXT: mov r7, #0 ; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: movge r0, r4 ; CHECK-FP16-NEXT: movwlt r2, #1 ; CHECK-FP16-NEXT: cmp r2, #0 ; CHECK-FP16-NEXT: movne r2, r1 -; CHECK-FP16-NEXT: moveq r0, r5 ; CHECK-FP16-NEXT: rsbs r1, r0, #0 ; CHECK-FP16-NEXT: rscs r1, r2, #0 ; CHECK-FP16-NEXT: movwlt r7, #1 ; CHECK-FP16-NEXT: cmp r7, #0 ; CHECK-FP16-NEXT: movne r7, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: subs r2, r0, r5 +; CHECK-FP16-NEXT: subs r2, r0, r4 ; CHECK-FP16-NEXT: vmov.f32 s0, s16 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 -; CHECK-FP16-NEXT: mov r4, #0 +; CHECK-FP16-NEXT: mov r5, #0 ; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: movge r0, r4 ; CHECK-FP16-NEXT: movwlt r2, #1 ; CHECK-FP16-NEXT: cmp r2, #0 ; CHECK-FP16-NEXT: movne r2, r1 -; CHECK-FP16-NEXT: moveq r0, r5 ; CHECK-FP16-NEXT: rsbs r1, r0, #0 ; CHECK-FP16-NEXT: rscs r1, r2, #0 -; CHECK-FP16-NEXT: movwlt r4, #1 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: movne r4, r0 +; CHECK-FP16-NEXT: movwlt r5, #1 +; CHECK-FP16-NEXT: cmp r5, #0 +; CHECK-FP16-NEXT: movne r5, r0 ; CHECK-FP16-NEXT: bl __fixhfdi -; CHECK-FP16-NEXT: subs r2, r0, r5 -; CHECK-FP16-NEXT: vmov.32 d1[0], r4 +; CHECK-FP16-NEXT: subs r2, r0, r4 +; CHECK-FP16-NEXT: vmov.32 d1[0], r5 ; CHECK-FP16-NEXT: sbcs r2, r1, #0 -; CHECK-FP16-NEXT: mov r2, #0 +; CHECK-FP16-NEXT: movlt r4, r0 +; CHECK-FP16-NEXT: mov r0, #0 +; CHECK-FP16-NEXT: movwlt r0, #1 +; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: movne r0, r1 +; CHECK-FP16-NEXT: rsbs r1, r4, #0 +; CHECK-FP16-NEXT: rscs r0, r0, #0 ; CHECK-FP16-NEXT: vmov.32 d0[0], r7 -; CHECK-FP16-NEXT: movwlt r2, #1 -; CHECK-FP16-NEXT: cmp r2, #0 -; CHECK-FP16-NEXT: movne r2, r1 -; CHECK-FP16-NEXT: movne r5, r0 -; CHECK-FP16-NEXT: rsbs r0, r5, #0 -; CHECK-FP16-NEXT: vmov.32 d1[1], r6 -; CHECK-FP16-NEXT: rscs r0, r2, #0 ; CHECK-FP16-NEXT: movwlt r8, #1 ; CHECK-FP16-NEXT: cmp r8, #0 -; CHECK-FP16-NEXT: movne r8, r5 +; CHECK-FP16-NEXT: vmov.32 d1[1], r6 +; CHECK-FP16-NEXT: movne r8, r4 ; CHECK-FP16-NEXT: vmov.32 d0[1], r8 ; CHECK-FP16-NEXT: vpop {d8, d9} ; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc} @@ -3630,59 +3599,56 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mvn r8, #0 -; CHECK-NEXT: subs r0, r0, r8 -; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: sbcs r0, r1, r6 -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: sbcs r0, r2, #0 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: mvn r9, #0 +; CHECK-NEXT: subs r1, r0, r9 +; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: sbcs r1, r4, r5 ; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: sbcs r0, r3, #0 +; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r9, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r3, r0 -; CHECK-NEXT: movne r0, r2 -; CHECK-NEXT: moveq r10, r6 -; CHECK-NEXT: moveq r5, r8 -; CHECK-NEXT: rsbs r1, r5, #0 -; CHECK-NEXT: rscs r1, r10, #-2147483648 -; CHECK-NEXT: sbcs r0, r8, r0 -; CHECK-NEXT: sbcs r0, r8, r3 -; CHECK-NEXT: movwlt r7, #1 +; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: mov r8, #-2147483648 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r3, r1 +; CHECK-NEXT: movne r1, r2 +; CHECK-NEXT: moveq r4, r5 +; CHECK-NEXT: moveq r0, r9 +; CHECK-NEXT: rsbs r2, r0, #0 +; CHECK-NEXT: rscs r2, r4, #-2147483648 +; CHECK-NEXT: sbcs r1, r9, r1 +; CHECK-NEXT: sbcs r1, r9, r3 +; CHECK-NEXT: movwlt r7, #1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r5, r7 +; CHECK-NEXT: movne r7, r0 +; CHECK-NEXT: moveq r4, r8 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r4, r0, r8 -; CHECK-NEXT: vmov.32 d1[0], r5 -; CHECK-NEXT: sbcs r4, r1, r6 -; CHECK-NEXT: sbcs r4, r2, #0 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: moveq r3, r4 -; CHECK-NEXT: movne r6, r1 -; CHECK-NEXT: movne r4, r2 -; CHECK-NEXT: moveq r0, r8 -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: rscs r1, r6, #-2147483648 -; CHECK-NEXT: sbcs r1, r8, r4 -; CHECK-NEXT: sbcs r1, r8, r3 -; CHECK-NEXT: movwlt r9, #1 -; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: subs r6, r0, r9 +; CHECK-NEXT: vmov.32 d1[0], r7 +; CHECK-NEXT: sbcs r6, r1, r5 +; CHECK-NEXT: sbcs r6, r2, #0 +; CHECK-NEXT: sbcs r6, r3, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: moveq r3, r6 +; CHECK-NEXT: movne r6, r2 +; CHECK-NEXT: movne r5, r1 ; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: mov r1, #-2147483648 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: moveq r10, r1 -; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: vmov.32 d1[1], r10 -; CHECK-NEXT: moveq r6, r1 -; CHECK-NEXT: vmov.32 d0[1], r6 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: rscs r1, r5, #-2147483648 +; CHECK-NEXT: sbcs r1, r9, r6 +; CHECK-NEXT: sbcs r1, r9, r3 +; CHECK-NEXT: movwlt r10, #1 +; CHECK-NEXT: cmp r10, #0 +; CHECK-NEXT: movne r10, r0 +; CHECK-NEXT: moveq r5, r8 +; CHECK-NEXT: vmov.32 d0[0], r10 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: @@ -3696,39 +3662,36 @@ entry: define <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: utest_f64i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: subs r0, r2, #1 -; CHECK-NEXT: vorr d0, d8, d8 -; CHECK-NEXT: sbcs r0, r3, #0 -; CHECK-NEXT: mov r7, #0 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: movwlo r7, #1 -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: subs r1, r2, #1 +; CHECK-NEXT: vorr d0, d8, d8 +; CHECK-NEXT: sbcs r1, r3, #0 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: moveq r5, r7 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: movwlo r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: moveq r4, r6 +; CHECK-NEXT: movne r6, r0 ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r5 +; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r0, r6 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r4 +; CHECK-NEXT: movwlo r5, #1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: moveq r0, r5 +; CHECK-NEXT: movne r5, r1 ; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: vmov.32 d1[1], r7 -; CHECK-NEXT: movne r6, r1 -; CHECK-NEXT: vmov.32 d0[1], r6 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptoui <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3739,49 +3702,45 @@ entry: define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vorr q4, q0, q0 +; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: subs r0, r2, #1 -; CHECK-NEXT: vorr d0, d9, d9 ; CHECK-NEXT: sbcs r0, r3, #0 -; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r6, r7 -; CHECK-NEXT: moveq r5, r7 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movwmi r6, #0 +; CHECK-NEXT: vorr d0, d8, d8 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: moveq r4, r0 +; CHECK-NEXT: movne r0, r3 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movwmi r4, #0 +; CHECK-NEXT: movwmi r5, #0 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: vmov.32 d0[0], r6 +; CHECK-NEXT: vmov.32 d1[0], r5 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: moveq r0, r4 -; CHECK-NEXT: moveq r3, r4 -; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: moveq r1, r6 +; CHECK-NEXT: moveq r0, r6 +; CHECK-NEXT: movne r6, r3 +; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: movwmi r0, #0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movne r4, r1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: vmov.32 d1[0], r0 -; CHECK-NEXT: movwmi r4, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r8 -; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: movwmi r1, #0 +; CHECK-NEXT: vmov.32 d0[0], r0 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: movwmi r7, #0 -; CHECK-NEXT: vmov.32 d0[1], r7 +; CHECK-NEXT: vmov.32 d0[1], r1 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3800,59 +3759,56 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mvn r8, #0 -; CHECK-NEXT: subs r0, r0, r8 -; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: sbcs r0, r1, r6 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: mvn r9, #0 +; CHECK-NEXT: subs r1, r0, r9 +; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: sbcs r1, r4, r5 ; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: sbcs r0, r2, #0 -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: sbcs r0, r3, #0 +; CHECK-NEXT: sbcs r1, r2, #0 ; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: mov r9, #0 -; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: moveq r3, r0 -; CHECK-NEXT: movne r0, r2 -; CHECK-NEXT: moveq r10, r6 -; CHECK-NEXT: moveq r5, r8 -; CHECK-NEXT: rsbs r1, r5, #0 -; CHECK-NEXT: rscs r1, r10, #-2147483648 -; CHECK-NEXT: sbcs r0, r8, r0 -; CHECK-NEXT: sbcs r0, r8, r3 +; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: mov r8, #-2147483648 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: mov r10, #0 +; CHECK-NEXT: movwlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: moveq r3, r1 +; CHECK-NEXT: movne r1, r2 +; CHECK-NEXT: moveq r4, r5 +; CHECK-NEXT: moveq r0, r9 +; CHECK-NEXT: rsbs r2, r0, #0 +; CHECK-NEXT: rscs r2, r4, #-2147483648 +; CHECK-NEXT: sbcs r1, r9, r1 +; CHECK-NEXT: sbcs r1, r9, r3 ; CHECK-NEXT: movwlt r7, #1 ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r5, r7 +; CHECK-NEXT: movne r7, r0 +; CHECK-NEXT: moveq r4, r8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r4, r0, r8 -; CHECK-NEXT: vmov.32 d1[0], r5 -; CHECK-NEXT: sbcs r4, r1, r6 -; CHECK-NEXT: sbcs r4, r2, #0 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: moveq r3, r4 -; CHECK-NEXT: movne r6, r1 -; CHECK-NEXT: movne r4, r2 -; CHECK-NEXT: moveq r0, r8 -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: rscs r1, r6, #-2147483648 -; CHECK-NEXT: sbcs r1, r8, r4 -; CHECK-NEXT: sbcs r1, r8, r3 -; CHECK-NEXT: movwlt r9, #1 -; CHECK-NEXT: cmp r9, #0 +; CHECK-NEXT: subs r6, r0, r9 +; CHECK-NEXT: vmov.32 d1[0], r7 +; CHECK-NEXT: sbcs r6, r1, r5 +; CHECK-NEXT: sbcs r6, r2, #0 +; CHECK-NEXT: sbcs r6, r3, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: moveq r3, r6 +; CHECK-NEXT: movne r6, r2 +; CHECK-NEXT: movne r5, r1 ; CHECK-NEXT: moveq r0, r9 -; CHECK-NEXT: mov r1, #-2147483648 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: moveq r10, r1 -; CHECK-NEXT: cmp r9, #0 -; CHECK-NEXT: vmov.32 d1[1], r10 -; CHECK-NEXT: moveq r6, r1 -; CHECK-NEXT: vmov.32 d0[1], r6 +; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: rscs r1, r5, #-2147483648 +; CHECK-NEXT: sbcs r1, r9, r6 +; CHECK-NEXT: sbcs r1, r9, r3 +; CHECK-NEXT: movwlt r10, #1 +; CHECK-NEXT: cmp r10, #0 +; CHECK-NEXT: movne r10, r0 +; CHECK-NEXT: moveq r5, r8 +; CHECK-NEXT: vmov.32 d0[0], r10 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8} ; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: @@ -3866,39 +3822,36 @@ entry: define <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: utest_f32i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} ; CHECK-NEXT: vmov.f64 d8, d0 ; CHECK-NEXT: vmov.f32 s0, s17 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vmov.f32 s0, s16 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: subs r0, r2, #1 -; CHECK-NEXT: mov r7, #0 -; CHECK-NEXT: sbcs r0, r3, #0 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: movwlo r7, #1 -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: subs r1, r2, #1 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: moveq r5, r7 +; CHECK-NEXT: sbcs r1, r3, #0 +; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: movwlo r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: moveq r4, r6 +; CHECK-NEXT: movne r6, r0 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: vmov.32 d1[0], r5 +; CHECK-NEXT: vmov.32 d1[0], r6 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: movwlo r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: moveq r0, r6 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r4 +; CHECK-NEXT: movwlo r5, #1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: moveq r0, r5 +; CHECK-NEXT: movne r5, r1 ; CHECK-NEXT: vmov.32 d0[0], r0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: vmov.32 d1[1], r7 -; CHECK-NEXT: movne r6, r1 -; CHECK-NEXT: vmov.32 d0[1], r6 +; CHECK-NEXT: vmov.32 d1[1], r4 +; CHECK-NEXT: vmov.32 d0[1], r5 ; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptoui <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3909,49 +3862,45 @@ entry: define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: ustest_f32i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} ; CHECK-NEXT: vmov.f64 d8, d0 -; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vmov.f32 s0, s17 -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vmov.f32 s0, s16 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: subs r0, r2, #1 -; CHECK-NEXT: mov r7, #0 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: sbcs r0, r3, #0 -; CHECK-NEXT: mov r5, r3 -; CHECK-NEXT: movwlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: moveq r6, r7 -; CHECK-NEXT: moveq r5, r7 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: movwmi r6, #0 +; CHECK-NEXT: mov r6, #0 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movwlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: moveq r5, r0 +; CHECK-NEXT: moveq r4, r0 +; CHECK-NEXT: movne r0, r3 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: movwmi r4, #0 +; CHECK-NEXT: movwmi r5, #0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: subs r2, r2, #1 -; CHECK-NEXT: vmov.32 d0[0], r6 +; CHECK-NEXT: vmov.32 d1[0], r5 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: movwlt r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: moveq r0, r4 -; CHECK-NEXT: moveq r3, r4 -; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: movwlt r6, #1 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: moveq r1, r6 +; CHECK-NEXT: moveq r0, r6 +; CHECK-NEXT: movne r6, r3 +; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: movwmi r0, #0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: movne r4, r1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: vmov.32 d1[0], r0 -; CHECK-NEXT: movwmi r4, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: movne r7, r8 -; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: movwmi r1, #0 +; CHECK-NEXT: vmov.32 d0[0], r0 ; CHECK-NEXT: vmov.32 d1[1], r4 -; CHECK-NEXT: movwmi r7, #0 -; CHECK-NEXT: vmov.32 d0[1], r7 +; CHECK-NEXT: vmov.32 d0[1], r1 ; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -3963,10 +3912,8 @@ entry: define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-LABEL: stest_f16i64_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEON-NEXT: .pad #4 -; CHECK-NEON-NEXT: sub sp, sp, #4 +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 @@ -3977,62 +3924,58 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: mov r5, r0 +; CHECK-NEON-NEXT: mov r4, r1 ; CHECK-NEON-NEXT: mvn r9, #0 -; CHECK-NEON-NEXT: subs r0, r0, r9 -; CHECK-NEON-NEXT: mvn r7, #-2147483648 -; CHECK-NEON-NEXT: sbcs r0, r1, r7 -; CHECK-NEON-NEXT: mov r11, r1 -; CHECK-NEON-NEXT: sbcs r0, r2, #0 +; CHECK-NEON-NEXT: subs r1, r0, r9 +; CHECK-NEON-NEXT: mvn r6, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r4, r6 ; CHECK-NEON-NEXT: vmov s0, r8 -; CHECK-NEON-NEXT: sbcs r0, r3, #0 -; CHECK-NEON-NEXT: mov r6, #0 -; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: sbcs r1, r2, #0 +; CHECK-NEON-NEXT: mov r5, #0 +; CHECK-NEON-NEXT: sbcs r1, r3, #0 +; CHECK-NEON-NEXT: mov r8, #-2147483648 +; CHECK-NEON-NEXT: mov r1, #0 ; CHECK-NEON-NEXT: mov r10, #0 -; CHECK-NEON-NEXT: movwlt r0, #1 -; CHECK-NEON-NEXT: cmp r0, #0 -; CHECK-NEON-NEXT: moveq r3, r0 -; CHECK-NEON-NEXT: movne r0, r2 -; CHECK-NEON-NEXT: moveq r11, r7 -; CHECK-NEON-NEXT: moveq r5, r9 -; CHECK-NEON-NEXT: rsbs r1, r5, #0 -; CHECK-NEON-NEXT: rscs r1, r11, #-2147483648 -; CHECK-NEON-NEXT: sbcs r0, r9, r0 -; CHECK-NEON-NEXT: sbcs r0, r9, r3 -; CHECK-NEON-NEXT: movwlt r6, #1 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: moveq r5, r6 +; CHECK-NEON-NEXT: movwlt r1, #1 +; CHECK-NEON-NEXT: cmp r1, #0 +; CHECK-NEON-NEXT: moveq r3, r1 +; CHECK-NEON-NEXT: movne r1, r2 +; CHECK-NEON-NEXT: moveq r4, r6 +; CHECK-NEON-NEXT: moveq r0, r9 +; CHECK-NEON-NEXT: rsbs r2, r0, #0 +; CHECK-NEON-NEXT: rscs r2, r4, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r9, r1 +; CHECK-NEON-NEXT: sbcs r1, r9, r3 +; CHECK-NEON-NEXT: movwlt r5, #1 +; CHECK-NEON-NEXT: cmp r5, #0 +; CHECK-NEON-NEXT: movne r5, r0 +; CHECK-NEON-NEXT: moveq r4, r8 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: subs r4, r0, r9 +; CHECK-NEON-NEXT: subs r7, r0, r9 ; CHECK-NEON-NEXT: vmov.32 d1[0], r5 -; CHECK-NEON-NEXT: sbcs r4, r1, r7 -; CHECK-NEON-NEXT: sbcs r4, r2, #0 -; CHECK-NEON-NEXT: sbcs r4, r3, #0 -; CHECK-NEON-NEXT: mov r4, #0 -; CHECK-NEON-NEXT: movwlt r4, #1 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: moveq r3, r4 -; CHECK-NEON-NEXT: movne r7, r1 -; CHECK-NEON-NEXT: movne r4, r2 +; CHECK-NEON-NEXT: sbcs r7, r1, r6 +; CHECK-NEON-NEXT: sbcs r7, r2, #0 +; CHECK-NEON-NEXT: sbcs r7, r3, #0 +; CHECK-NEON-NEXT: mov r7, #0 +; CHECK-NEON-NEXT: movwlt r7, #1 +; CHECK-NEON-NEXT: cmp r7, #0 +; CHECK-NEON-NEXT: moveq r3, r7 +; CHECK-NEON-NEXT: movne r7, r2 +; CHECK-NEON-NEXT: movne r6, r1 ; CHECK-NEON-NEXT: moveq r0, r9 ; CHECK-NEON-NEXT: rsbs r1, r0, #0 -; CHECK-NEON-NEXT: rscs r1, r7, #-2147483648 -; CHECK-NEON-NEXT: sbcs r1, r9, r4 +; CHECK-NEON-NEXT: rscs r1, r6, #-2147483648 +; CHECK-NEON-NEXT: sbcs r1, r9, r7 ; CHECK-NEON-NEXT: sbcs r1, r9, r3 ; CHECK-NEON-NEXT: movwlt r10, #1 ; CHECK-NEON-NEXT: cmp r10, #0 -; CHECK-NEON-NEXT: moveq r0, r10 -; CHECK-NEON-NEXT: mov r1, #-2147483648 -; CHECK-NEON-NEXT: cmp r6, #0 -; CHECK-NEON-NEXT: vmov.32 d0[0], r0 -; CHECK-NEON-NEXT: moveq r11, r1 -; CHECK-NEON-NEXT: cmp r10, #0 -; CHECK-NEON-NEXT: vmov.32 d1[1], r11 -; CHECK-NEON-NEXT: moveq r7, r1 -; CHECK-NEON-NEXT: vmov.32 d0[1], r7 +; CHECK-NEON-NEXT: movne r10, r0 +; CHECK-NEON-NEXT: moveq r6, r8 +; CHECK-NEON-NEXT: vmov.32 d0[0], r10 +; CHECK-NEON-NEXT: vmov.32 d1[1], r4 +; CHECK-NEON-NEXT: vmov.32 d0[1], r6 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: add sp, sp, #4 -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} ; ; CHECK-FP16-LABEL: stest_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry @@ -4042,59 +3985,56 @@ define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { ; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: mov r5, r0 -; CHECK-FP16-NEXT: mvn r8, #0 -; CHECK-FP16-NEXT: subs r0, r0, r8 -; CHECK-FP16-NEXT: mvn r6, #-2147483648 -; CHECK-FP16-NEXT: sbcs r0, r1, r6 -; CHECK-FP16-NEXT: mov r10, r1 -; CHECK-FP16-NEXT: sbcs r0, r2, #0 +; CHECK-FP16-NEXT: mov r4, r1 +; CHECK-FP16-NEXT: mvn r9, #0 +; CHECK-FP16-NEXT: subs r1, r0, r9 +; CHECK-FP16-NEXT: mvn r5, #-2147483648 +; CHECK-FP16-NEXT: sbcs r1, r4, r5 ; CHECK-FP16-NEXT: vmov s0, r7 -; CHECK-FP16-NEXT: sbcs r0, r3, #0 +; CHECK-FP16-NEXT: sbcs r1, r2, #0 ; CHECK-FP16-NEXT: mov r7, #0 -; CHECK-FP16-NEXT: mov r0, #0 -; CHECK-FP16-NEXT: mov r9, #0 -; CHECK-FP16-NEXT: movwlt r0, #1 -; CHECK-FP16-NEXT: cmp r0, #0 -; CHECK-FP16-NEXT: moveq r3, r0 -; CHECK-FP16-NEXT: movne r0, r2 -; CHECK-FP16-NEXT: moveq r10, r6 -; CHECK-FP16-NEXT: moveq r5, r8 -; CHECK-FP16-NEXT: rsbs r1, r5, #0 -; CHECK-FP16-NEXT: rscs r1, r10, #-2147483648 -; CHECK-FP16-NEXT: sbcs r0, r8, r0 -; CHECK-FP16-NEXT: sbcs r0, r8, r3 +; CHECK-FP16-NEXT: sbcs r1, r3, #0 +; CHECK-FP16-NEXT: mov r8, #-2147483648 +; CHECK-FP16-NEXT: mov r1, #0 +; CHECK-FP16-NEXT: mov r10, #0 +; CHECK-FP16-NEXT: movwlt r1, #1 +; CHECK-FP16-NEXT: cmp r1, #0 +; CHECK-FP16-NEXT: moveq r3, r1 +; CHECK-FP16-NEXT: movne r1, r2 +; CHECK-FP16-NEXT: moveq r4, r5 +; CHECK-FP16-NEXT: moveq r0, r9 +; CHECK-FP16-NEXT: rsbs r2, r0, #0 +; CHECK-FP16-NEXT: rscs r2, r4, #-2147483648 +; CHECK-FP16-NEXT: sbcs r1, r9, r1 +; CHECK-FP16-NEXT: sbcs r1, r9, r3 ; CHECK-FP16-NEXT: movwlt r7, #1 ; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: moveq r5, r7 +; CHECK-FP16-NEXT: movne r7, r0 +; CHECK-FP16-NEXT: moveq r4, r8 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: subs r4, r0, r8 -; CHECK-FP16-NEXT: vmov.32 d1[0], r5 -; CHECK-FP16-NEXT: sbcs r4, r1, r6 -; CHECK-FP16-NEXT: sbcs r4, r2, #0 -; CHECK-FP16-NEXT: sbcs r4, r3, #0 -; CHECK-FP16-NEXT: mov r4, #0 -; CHECK-FP16-NEXT: movwlt r4, #1 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: moveq r3, r4 -; CHECK-FP16-NEXT: movne r6, r1 -; CHECK-FP16-NEXT: movne r4, r2 -; CHECK-FP16-NEXT: moveq r0, r8 -; CHECK-FP16-NEXT: rsbs r1, r0, #0 -; CHECK-FP16-NEXT: rscs r1, r6, #-2147483648 -; CHECK-FP16-NEXT: sbcs r1, r8, r4 -; CHECK-FP16-NEXT: sbcs r1, r8, r3 -; CHECK-FP16-NEXT: movwlt r9, #1 -; CHECK-FP16-NEXT: cmp r9, #0 +; CHECK-FP16-NEXT: subs r6, r0, r9 +; CHECK-FP16-NEXT: vmov.32 d1[0], r7 +; CHECK-FP16-NEXT: sbcs r6, r1, r5 +; CHECK-FP16-NEXT: sbcs r6, r2, #0 +; CHECK-FP16-NEXT: sbcs r6, r3, #0 +; CHECK-FP16-NEXT: mov r6, #0 +; CHECK-FP16-NEXT: movwlt r6, #1 +; CHECK-FP16-NEXT: cmp r6, #0 +; CHECK-FP16-NEXT: moveq r3, r6 +; CHECK-FP16-NEXT: movne r6, r2 +; CHECK-FP16-NEXT: movne r5, r1 ; CHECK-FP16-NEXT: moveq r0, r9 -; CHECK-FP16-NEXT: mov r1, #-2147483648 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: vmov.32 d0[0], r0 -; CHECK-FP16-NEXT: moveq r10, r1 -; CHECK-FP16-NEXT: cmp r9, #0 -; CHECK-FP16-NEXT: vmov.32 d1[1], r10 -; CHECK-FP16-NEXT: moveq r6, r1 -; CHECK-FP16-NEXT: vmov.32 d0[1], r6 +; CHECK-FP16-NEXT: rsbs r1, r0, #0 +; CHECK-FP16-NEXT: rscs r1, r5, #-2147483648 +; CHECK-FP16-NEXT: sbcs r1, r9, r6 +; CHECK-FP16-NEXT: sbcs r1, r9, r3 +; CHECK-FP16-NEXT: movwlt r10, #1 +; CHECK-FP16-NEXT: cmp r10, #0 +; CHECK-FP16-NEXT: movne r10, r0 +; CHECK-FP16-NEXT: moveq r5, r8 +; CHECK-FP16-NEXT: vmov.32 d0[0], r10 +; CHECK-FP16-NEXT: vmov.32 d1[1], r4 +; CHECK-FP16-NEXT: vmov.32 d0[1], r5 ; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> @@ -4107,8 +4047,8 @@ entry: define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-LABEL: utesth_f16i64_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEON-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} ; CHECK-NEON-NEXT: vmov r0, s0 @@ -4119,66 +4059,60 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixunssfti -; CHECK-NEON-NEXT: mov r6, r0 -; CHECK-NEON-NEXT: subs r0, r2, #1 +; CHECK-NEON-NEXT: mov r4, r1 +; CHECK-NEON-NEXT: subs r1, r2, #1 ; CHECK-NEON-NEXT: vmov s0, r5 -; CHECK-NEON-NEXT: sbcs r0, r3, #0 +; CHECK-NEON-NEXT: sbcs r1, r3, #0 ; CHECK-NEON-NEXT: mov r5, #0 -; CHECK-NEON-NEXT: mov r4, r1 +; CHECK-NEON-NEXT: mov r6, #0 ; CHECK-NEON-NEXT: movwlo r5, #1 ; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: moveq r6, r5 +; CHECK-NEON-NEXT: moveq r4, r5 +; CHECK-NEON-NEXT: movne r5, r0 ; CHECK-NEON-NEXT: bl __fixunssfti ; CHECK-NEON-NEXT: subs r2, r2, #1 -; CHECK-NEON-NEXT: vmov.32 d1[0], r6 +; CHECK-NEON-NEXT: vmov.32 d1[0], r5 ; CHECK-NEON-NEXT: sbcs r2, r3, #0 -; CHECK-NEON-NEXT: movwlo r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r0, r7 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: movne r5, r4 +; CHECK-NEON-NEXT: movwlo r6, #1 +; CHECK-NEON-NEXT: cmp r6, #0 +; CHECK-NEON-NEXT: moveq r0, r6 +; CHECK-NEON-NEXT: movne r6, r1 ; CHECK-NEON-NEXT: vmov.32 d0[0], r0 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: vmov.32 d1[1], r5 -; CHECK-NEON-NEXT: movne r7, r1 -; CHECK-NEON-NEXT: vmov.32 d0[1], r7 +; CHECK-NEON-NEXT: vmov.32 d1[1], r4 +; CHECK-NEON-NEXT: vmov.32 d0[1], r6 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, pc} ; ; CHECK-FP16-LABEL: utesth_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-FP16-NEXT: .save {r4, r5, r6, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, lr} ; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] -; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] +; CHECK-FP16-NEXT: vmov.u16 r6, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixunshfti -; CHECK-FP16-NEXT: mov r5, r0 -; CHECK-FP16-NEXT: subs r0, r2, #1 -; CHECK-FP16-NEXT: vmov s0, r7 -; CHECK-FP16-NEXT: sbcs r0, r3, #0 -; CHECK-FP16-NEXT: mov r7, #0 ; CHECK-FP16-NEXT: mov r4, r1 -; CHECK-FP16-NEXT: movwlo r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 +; CHECK-FP16-NEXT: subs r1, r2, #1 +; CHECK-FP16-NEXT: vmov s0, r6 +; CHECK-FP16-NEXT: sbcs r1, r3, #0 ; CHECK-FP16-NEXT: mov r6, #0 -; CHECK-FP16-NEXT: moveq r5, r7 +; CHECK-FP16-NEXT: mov r5, #0 +; CHECK-FP16-NEXT: movwlo r6, #1 +; CHECK-FP16-NEXT: cmp r6, #0 +; CHECK-FP16-NEXT: moveq r4, r6 +; CHECK-FP16-NEXT: movne r6, r0 ; CHECK-FP16-NEXT: bl __fixunshfti ; CHECK-FP16-NEXT: subs r2, r2, #1 -; CHECK-FP16-NEXT: vmov.32 d1[0], r5 +; CHECK-FP16-NEXT: vmov.32 d1[0], r6 ; CHECK-FP16-NEXT: sbcs r2, r3, #0 -; CHECK-FP16-NEXT: movwlo r6, #1 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: moveq r0, r6 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: movne r7, r4 +; CHECK-FP16-NEXT: movwlo r5, #1 +; CHECK-FP16-NEXT: cmp r5, #0 +; CHECK-FP16-NEXT: moveq r0, r5 +; CHECK-FP16-NEXT: movne r5, r1 ; CHECK-FP16-NEXT: vmov.32 d0[0], r0 -; CHECK-FP16-NEXT: cmp r6, #0 -; CHECK-FP16-NEXT: vmov.32 d1[1], r7 -; CHECK-FP16-NEXT: movne r6, r1 -; CHECK-FP16-NEXT: vmov.32 d0[1], r6 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-FP16-NEXT: vmov.32 d1[1], r4 +; CHECK-FP16-NEXT: vmov.32 d0[1], r5 +; CHECK-FP16-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptoui <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -4189,100 +4123,90 @@ entry: define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEON-LABEL: ustest_f16i64_mm: ; CHECK-NEON: @ %bb.0: @ %entry -; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} +; CHECK-NEON-NEXT: push {r4, r5, r6, r7, r11, lr} ; CHECK-NEON-NEXT: .vsave {d8} ; CHECK-NEON-NEXT: vpush {d8} -; CHECK-NEON-NEXT: vmov r0, s1 -; CHECK-NEON-NEXT: vmov.f32 s16, s0 +; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmov.f32 s16, s1 ; CHECK-NEON-NEXT: bl __aeabi_h2f -; CHECK-NEON-NEXT: mov r7, r0 +; CHECK-NEON-NEXT: mov r6, r0 ; CHECK-NEON-NEXT: vmov r0, s16 ; CHECK-NEON-NEXT: bl __aeabi_h2f ; CHECK-NEON-NEXT: vmov s0, r0 ; CHECK-NEON-NEXT: bl __fixsfti -; CHECK-NEON-NEXT: mov r6, r0 +; CHECK-NEON-NEXT: mov r5, r0 ; CHECK-NEON-NEXT: subs r0, r2, #1 -; CHECK-NEON-NEXT: vmov s0, r7 ; CHECK-NEON-NEXT: sbcs r0, r3, #0 +; CHECK-NEON-NEXT: vmov s0, r6 +; CHECK-NEON-NEXT: mov r0, #0 +; CHECK-NEON-NEXT: mov r4, r1 +; CHECK-NEON-NEXT: movwlt r0, #1 +; CHECK-NEON-NEXT: cmp r0, #0 +; CHECK-NEON-NEXT: moveq r5, r0 +; CHECK-NEON-NEXT: moveq r4, r0 +; CHECK-NEON-NEXT: movne r0, r3 +; CHECK-NEON-NEXT: cmp r0, #0 ; CHECK-NEON-NEXT: mov r7, #0 -; CHECK-NEON-NEXT: mov r5, r3 -; CHECK-NEON-NEXT: movwlt r7, #1 -; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: moveq r6, r7 -; CHECK-NEON-NEXT: moveq r5, r7 -; CHECK-NEON-NEXT: cmp r5, #0 -; CHECK-NEON-NEXT: mov r8, r1 -; CHECK-NEON-NEXT: mov r4, #0 -; CHECK-NEON-NEXT: movwmi r6, #0 +; CHECK-NEON-NEXT: movwmi r4, #0 +; CHECK-NEON-NEXT: movwmi r5, #0 ; CHECK-NEON-NEXT: bl __fixsfti ; CHECK-NEON-NEXT: subs r2, r2, #1 -; CHECK-NEON-NEXT: vmov.32 d0[0], r6 +; CHECK-NEON-NEXT: vmov.32 d1[0], r5 ; CHECK-NEON-NEXT: sbcs r2, r3, #0 -; CHECK-NEON-NEXT: movwlt r4, #1 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: moveq r0, r4 -; CHECK-NEON-NEXT: moveq r3, r4 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: movwmi r0, #0 -; CHECK-NEON-NEXT: cmp r4, #0 -; CHECK-NEON-NEXT: movne r4, r1 -; CHECK-NEON-NEXT: cmp r3, #0 -; CHECK-NEON-NEXT: vmov.32 d1[0], r0 -; CHECK-NEON-NEXT: movwmi r4, #0 +; CHECK-NEON-NEXT: movwlt r7, #1 ; CHECK-NEON-NEXT: cmp r7, #0 -; CHECK-NEON-NEXT: movne r7, r8 -; CHECK-NEON-NEXT: cmp r5, #0 +; CHECK-NEON-NEXT: moveq r1, r7 +; CHECK-NEON-NEXT: moveq r0, r7 +; CHECK-NEON-NEXT: movne r7, r3 +; CHECK-NEON-NEXT: cmp r7, #0 +; CHECK-NEON-NEXT: movwmi r0, #0 +; CHECK-NEON-NEXT: movwmi r1, #0 +; CHECK-NEON-NEXT: vmov.32 d0[0], r0 ; CHECK-NEON-NEXT: vmov.32 d1[1], r4 -; CHECK-NEON-NEXT: movwmi r7, #0 -; CHECK-NEON-NEXT: vmov.32 d0[1], r7 +; CHECK-NEON-NEXT: vmov.32 d0[1], r1 ; CHECK-NEON-NEXT: vpop {d8} -; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} +; CHECK-NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} ; ; CHECK-FP16-LABEL: ustest_f16i64_mm: ; CHECK-FP16: @ %bb.0: @ %entry -; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r8, lr} -; CHECK-FP16-NEXT: vmov.u16 r0, d0[0] -; CHECK-FP16-NEXT: vmov.u16 r7, d0[1] +; CHECK-FP16-NEXT: .save {r4, r5, r6, r7, r11, lr} +; CHECK-FP16-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-FP16-NEXT: vmov.u16 r0, d0[1] +; CHECK-FP16-NEXT: vmov.u16 r7, d0[0] ; CHECK-FP16-NEXT: vmov s0, r0 ; CHECK-FP16-NEXT: bl __fixhfti -; CHECK-FP16-NEXT: mov r6, r0 +; CHECK-FP16-NEXT: mov r5, r0 ; CHECK-FP16-NEXT: subs r0, r2, #1 -; CHECK-FP16-NEXT: vmov s0, r7 ; CHECK-FP16-NEXT: sbcs r0, r3, #0 -; CHECK-FP16-NEXT: mov r7, #0 -; CHECK-FP16-NEXT: mov r5, r3 -; CHECK-FP16-NEXT: movwlt r7, #1 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: moveq r6, r7 -; CHECK-FP16-NEXT: moveq r5, r7 -; CHECK-FP16-NEXT: cmp r5, #0 -; CHECK-FP16-NEXT: mov r8, r1 -; CHECK-FP16-NEXT: mov r4, #0 -; CHECK-FP16-NEXT: movwmi r6, #0 +; CHECK-FP16-NEXT: vmov s0, r7 +; CHECK-FP16-NEXT: mov r0, #0 +; CHECK-FP16-NEXT: mov r4, r1 +; CHECK-FP16-NEXT: movwlt r0, #1 +; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: moveq r5, r0 +; CHECK-FP16-NEXT: moveq r4, r0 +; CHECK-FP16-NEXT: movne r0, r3 +; CHECK-FP16-NEXT: cmp r0, #0 +; CHECK-FP16-NEXT: mov r6, #0 +; CHECK-FP16-NEXT: movwmi r4, #0 +; CHECK-FP16-NEXT: movwmi r5, #0 ; CHECK-FP16-NEXT: bl __fixhfti ; CHECK-FP16-NEXT: subs r2, r2, #1 -; CHECK-FP16-NEXT: vmov.32 d0[0], r6 +; CHECK-FP16-NEXT: vmov.32 d1[0], r5 ; CHECK-FP16-NEXT: sbcs r2, r3, #0 -; CHECK-FP16-NEXT: movwlt r4, #1 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: moveq r0, r4 -; CHECK-FP16-NEXT: moveq r3, r4 -; CHECK-FP16-NEXT: cmp r3, #0 +; CHECK-FP16-NEXT: movwlt r6, #1 +; CHECK-FP16-NEXT: cmp r6, #0 +; CHECK-FP16-NEXT: moveq r1, r6 +; CHECK-FP16-NEXT: moveq r0, r6 +; CHECK-FP16-NEXT: movne r6, r3 +; CHECK-FP16-NEXT: cmp r6, #0 ; CHECK-FP16-NEXT: movwmi r0, #0 -; CHECK-FP16-NEXT: cmp r4, #0 -; CHECK-FP16-NEXT: movne r4, r1 -; CHECK-FP16-NEXT: cmp r3, #0 -; CHECK-FP16-NEXT: vmov.32 d1[0], r0 -; CHECK-FP16-NEXT: movwmi r4, #0 -; CHECK-FP16-NEXT: cmp r7, #0 -; CHECK-FP16-NEXT: movne r7, r8 -; CHECK-FP16-NEXT: cmp r5, #0 +; CHECK-FP16-NEXT: movwmi r1, #0 +; CHECK-FP16-NEXT: vmov.32 d0[0], r0 ; CHECK-FP16-NEXT: vmov.32 d1[1], r4 -; CHECK-FP16-NEXT: movwmi r7, #0 -; CHECK-FP16-NEXT: vmov.32 d0[1], r7 -; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r8, pc} +; CHECK-FP16-NEXT: vmov.32 d0[1], r1 +; CHECK-FP16-NEXT: pop {r4, r5, r6, r7, r11, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) diff --git a/llvm/test/CodeGen/ARM/fpscr-multi-use.ll b/llvm/test/CodeGen/ARM/fpscr-multi-use.ll index 3e77ad65df992..c59c9824a902c 100644 --- a/llvm/test/CodeGen/ARM/fpscr-multi-use.ll +++ b/llvm/test/CodeGen/ARM/fpscr-multi-use.ll @@ -1,31 +1,51 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=armv7 %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7 %s -o - | FileCheck --check-prefix=THUMB %s declare double @fn() define void @test(ptr %p, ptr %res) nounwind { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: vldr d8, [r0] +; CHECK-NEXT: push {r4, r5, r11, lr} +; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: vcmp.f64 d8, #0 +; CHECK-NEXT: vcmp.f64 d16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vneg.f64 d16, d8 -; CHECK-NEXT: vmov.f64 d17, d8 -; CHECK-NEXT: vmovne.f64 d17, d16 -; CHECK-NEXT: vstr d17, [r1] +; CHECK-NEXT: mrs r5, apsr +; CHECK-NEXT: vneg.f64 d17, d16 +; CHECK-NEXT: vmovne.f64 d16, d17 +; CHECK-NEXT: vstr d16, [r1] ; CHECK-NEXT: bl fn -; CHECK-NEXT: vcmp.f64 d8, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vmov d16, r0, r1 ; CHECK-NEXT: eor r1, r1, #-2147483648 +; CHECK-NEXT: msr APSR_nzcvq, r5 ; CHECK-NEXT: vmov d17, r0, r1 ; CHECK-NEXT: vmovne.f64 d16, d17 ; CHECK-NEXT: vstr d16, [r4] -; CHECK-NEXT: vpop {d8} -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: pop {r4, r5, r11, pc} +; +; THUMB-LABEL: test: +; THUMB: @ %bb.0: @ %entry +; THUMB-NEXT: push {r4, r5, r7, lr} +; THUMB-NEXT: vldr d16, [r0] +; THUMB-NEXT: mov r4, r1 +; THUMB-NEXT: vcmp.f64 d16, #0 +; THUMB-NEXT: vmrs APSR_nzcv, fpscr +; THUMB-NEXT: mrs r5, apsr +; THUMB-NEXT: vneg.f64 d17, d16 +; THUMB-NEXT: it ne +; THUMB-NEXT: vmovne.f64 d16, d17 +; THUMB-NEXT: vstr d16, [r1] +; THUMB-NEXT: bl fn +; THUMB-NEXT: vmov d16, r0, r1 +; THUMB-NEXT: eor r1, r1, #-2147483648 +; THUMB-NEXT: msr APSR_nzcvq, r5 +; THUMB-NEXT: vmov d17, r0, r1 +; THUMB-NEXT: it ne +; THUMB-NEXT: vmovne.f64 d16, d17 +; THUMB-NEXT: vstr d16, [r4] +; THUMB-NEXT: pop {r4, r5, r7, pc} entry: %x = load double, ptr %p %cmp = fcmp une double %x, 0.000000e+00 diff --git a/llvm/test/CodeGen/ARM/fptoi-sat-store.ll b/llvm/test/CodeGen/ARM/fptoi-sat-store.ll index 67edf9855f372..d895fe89a2cdc 100644 --- a/llvm/test/CodeGen/ARM/fptoi-sat-store.ll +++ b/llvm/test/CodeGen/ARM/fptoi-sat-store.ll @@ -75,64 +75,62 @@ define void @test_signed_i32_f64(ptr %d, double %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 -; SOFT-NEXT: mov r5, r3 -; SOFT-NEXT: mov r6, r2 -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: ldr r2, .LCPI1_0 -; SOFT-NEXT: ldr r3, .LCPI1_1 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: movs r7, #0 -; SOFT-NEXT: ldr r3, .LCPI1_2 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: .pad #4 +; SOFT-NEXT: sub sp, #4 +; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: mov r7, r2 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: ldr r3, .LCPI1_0 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r5 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: bl __aeabi_d2iz ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB1_2 +; SOFT-NEXT: beq .LBB1_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: movs r0, #1 -; SOFT-NEXT: lsls r0, r0, #31 -; SOFT-NEXT: .LBB1_2: -; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB1_4 -; SOFT-NEXT: @ %bb.3: ; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: b .LBB1_5 -; SOFT-NEXT: .LBB1_4: +; SOFT-NEXT: b .LBB1_3 +; SOFT-NEXT: .LBB1_2: +; SOFT-NEXT: movs r0, #1 +; SOFT-NEXT: lsls r4, r0, #31 +; SOFT-NEXT: .LBB1_3: +; SOFT-NEXT: ldr r2, .LCPI1_1 +; SOFT-NEXT: ldr r3, .LCPI1_2 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB1_5 +; SOFT-NEXT: @ %bb.4: ; SOFT-NEXT: ldr r4, .LCPI1_3 ; SOFT-NEXT: .LBB1_5: -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r6 ; SOFT-NEXT: bl __aeabi_dcmpun ; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: bne .LBB1_7 ; SOFT-NEXT: @ %bb.6: -; SOFT-NEXT: mov r7, r4 +; SOFT-NEXT: mov r5, r4 ; SOFT-NEXT: .LBB1_7: -; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: str r7, [r0] -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: str r5, [r0] +; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.8: ; SOFT-NEXT: .LCPI1_0: -; SOFT-NEXT: .long 4290772992 @ 0xffc00000 +; SOFT-NEXT: .long 3252682752 @ 0xc1e00000 ; SOFT-NEXT: .LCPI1_1: -; SOFT-NEXT: .long 1105199103 @ 0x41dfffff +; SOFT-NEXT: .long 4290772992 @ 0xffc00000 ; SOFT-NEXT: .LCPI1_2: -; SOFT-NEXT: .long 3252682752 @ 0xc1e00000 +; SOFT-NEXT: .long 1105199103 @ 0x41dfffff ; SOFT-NEXT: .LCPI1_3: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -161,31 +159,31 @@ define void @test_unsigned_i32_f32(ptr %d, float %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, r7, lr} ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 -; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: mov r6, r1 ; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: ldr r1, .LCPI2_0 -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r6, r0 ; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_f2uiz -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: cmp r7, #0 ; SOFT-NEXT: bne .LBB2_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r4, r7 ; SOFT-NEXT: .LBB2_2: -; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: ldr r1, .LCPI2_0 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: beq .LBB2_4 ; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mvns r0, r5 +; SOFT-NEXT: mvns r4, r5 ; SOFT-NEXT: .LBB2_4: -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: str r0, [r1] +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: str r4, [r0] ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 @@ -209,41 +207,40 @@ define void @test_unsigned_i32_f64(ptr %d, double %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 -; SOFT-NEXT: mov r5, r3 -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: ldr r2, .LCPI3_0 -; SOFT-NEXT: ldr r3, .LCPI3_1 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: .pad #4 +; SOFT-NEXT: sub sp, #4 +; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: mov r7, r2 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: mov r0, r2 +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: bl __aeabi_dcmpge -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: bl __aeabi_d2uiz -; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: bne .LBB3_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: .LBB3_2: -; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: ldr r2, .LCPI3_0 +; SOFT-NEXT: ldr r3, .LCPI3_1 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: beq .LBB3_4 ; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mvns r0, r6 +; SOFT-NEXT: mvns r4, r5 ; SOFT-NEXT: .LBB3_4: -; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: str r0, [r1] -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: str r4, [r0] +; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.5: diff --git a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll index 84f6ee276ba5f..5179f97624489 100644 --- a/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/ARM/fptosi-sat-scalar.ll @@ -468,10 +468,10 @@ define i19 @test_signed_i19_f32(float %f) nounwind { ; VFP2-NEXT: movlt r0, #0 ; VFP2-NEXT: movtlt r0, #65532 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vcmp.f32 s0, s0 ; VFP2-NEXT: itt gt ; VFP2-NEXT: movwgt r0, #65535 ; VFP2-NEXT: movtgt r0, #3 -; VFP2-NEXT: vcmp.f32 s0, s0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr ; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r0, #0 @@ -569,71 +569,67 @@ define i50 @test_signed_i50_f32(float %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI6_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: .pad #4 +; SOFT-NEXT: sub sp, #4 +; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r0, #27 ; SOFT-NEXT: lsls r1, r0, #27 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: cmp r7, #0 ; SOFT-NEXT: bne .LBB6_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r5, r7 ; SOFT-NEXT: .LBB6_2: -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: beq .LBB6_4 ; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mvns r4, r6 +; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: b .LBB6_5 ; SOFT-NEXT: .LBB6_4: -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: bl __aeabi_fcmpun -; SOFT-NEXT: mov r1, r0 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: bne .LBB6_6 -; SOFT-NEXT: @ %bb.5: +; SOFT-NEXT: ldr r0, .LCPI6_0 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: .LBB6_5: +; SOFT-NEXT: ldr r1, .LCPI6_1 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB6_6: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB6_8 -; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: b .LBB6_9 -; SOFT-NEXT: .LBB6_8: -; SOFT-NEXT: ldr r3, .LCPI6_1 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: movs r7, #0 +; SOFT-NEXT: mvns r6, r7 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB6_7 +; SOFT-NEXT: @ %bb.6: +; SOFT-NEXT: ldr r0, .LCPI6_2 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: .LBB6_7: +; SOFT-NEXT: bne .LBB6_9 +; SOFT-NEXT: @ %bb.8: +; SOFT-NEXT: mov r6, r5 ; SOFT-NEXT: .LBB6_9: -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB6_11 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: bl __aeabi_fcmpun +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: bne .LBB6_11 ; SOFT-NEXT: @ %bb.10: -; SOFT-NEXT: ldr r3, .LCPI6_2 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: .LBB6_11: -; SOFT-NEXT: cmp r1, #0 ; SOFT-NEXT: bne .LBB6_13 ; SOFT-NEXT: @ %bb.12: -; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload ; SOFT-NEXT: .LBB6_13: -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.14: ; SOFT-NEXT: .LCPI6_0: -; SOFT-NEXT: .long 1476395007 @ 0x57ffffff -; SOFT-NEXT: .LCPI6_1: ; SOFT-NEXT: .long 4294836224 @ 0xfffe0000 +; SOFT-NEXT: .LCPI6_1: +; SOFT-NEXT: .long 1476395007 @ 0x57ffffff ; SOFT-NEXT: .LCPI6_2: ; SOFT-NEXT: .long 131071 @ 0x1ffff ; @@ -654,11 +650,12 @@ define i50 @test_signed_i50_f32(float %f) nounwind { ; VFP-NEXT: movlt r0, #0 ; VFP-NEXT: vcmp.f32 s2, s4 ; VFP-NEXT: vmrs APSR_nzcv, fpscr -; VFP-NEXT: ittt gt -; VFP-NEXT: movwgt r1, #65535 -; VFP-NEXT: movtgt r1, #1 +; VFP-NEXT: it gt ; VFP-NEXT: movgt.w r0, #-1 ; VFP-NEXT: vcmp.f32 s2, s2 +; VFP-NEXT: itt gt +; VFP-NEXT: movwgt r1, #65535 +; VFP-NEXT: movtgt r1, #1 ; VFP-NEXT: vmrs APSR_nzcv, fpscr ; VFP-NEXT: itt vs ; VFP-NEXT: movvs r0, #0 @@ -679,68 +676,60 @@ define i64 @test_signed_i64_f32(float %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI7_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: .pad #4 +; SOFT-NEXT: sub sp, #4 +; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r0, #223 ; SOFT-NEXT: lsls r1, r0, #24 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: movs r2, #1 +; SOFT-NEXT: lsls r7, r2, #31 +; SOFT-NEXT: cmp r6, #0 ; SOFT-NEXT: bne .LBB7_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: .LBB7_2: -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill ; SOFT-NEXT: beq .LBB7_4 ; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mvns r4, r6 +; SOFT-NEXT: mov r7, r1 ; SOFT-NEXT: .LBB7_4: -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: bl __aeabi_fcmpun -; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: ldr r1, .LCPI7_0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mvns r5, r6 ; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: bne .LBB7_6 +; SOFT-NEXT: beq .LBB7_6 ; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: ldr r7, .LCPI7_1 ; SOFT-NEXT: .LBB7_6: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB7_8 +; SOFT-NEXT: bne .LBB7_8 ; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: b .LBB7_9 +; SOFT-NEXT: ldr r5, [sp] @ 4-byte Reload ; SOFT-NEXT: .LBB7_8: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: lsls r3, r2, #31 -; SOFT-NEXT: .LBB7_9: -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB7_11 -; SOFT-NEXT: @ %bb.10: -; SOFT-NEXT: ldr r3, .LCPI7_1 -; SOFT-NEXT: .LBB7_11: -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB7_13 -; SOFT-NEXT: @ %bb.12: -; SOFT-NEXT: mov r6, r3 -; SOFT-NEXT: .LBB7_13: +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: bl __aeabi_fcmpun +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: bne .LBB7_10 +; SOFT-NEXT: @ %bb.9: +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: .LBB7_10: +; SOFT-NEXT: bne .LBB7_12 +; SOFT-NEXT: @ %bb.11: +; SOFT-NEXT: mov r6, r7 +; SOFT-NEXT: .LBB7_12: ; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.14: +; SOFT-NEXT: @ %bb.13: ; SOFT-NEXT: .LCPI7_0: ; SOFT-NEXT: .long 1593835519 @ 0x5effffff ; SOFT-NEXT: .LCPI7_1: @@ -788,110 +777,106 @@ define i100 @test_signed_i100_f32(float %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, r7, lr} ; SOFT-NEXT: .pad #20 ; SOFT-NEXT: sub sp, #20 -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: ldr r1, .LCPI8_0 -; SOFT-NEXT: bl __aeabi_fcmpgt ; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r0, #241 ; SOFT-NEXT: lsls r1, r0, #24 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r2, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: str r3, [sp] @ 4-byte Spill -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB8_2 +; SOFT-NEXT: movs r5, #7 +; SOFT-NEXT: str r5, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mvns r7, r5 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: beq .LBB8_17 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: beq .LBB8_18 ; SOFT-NEXT: .LBB8_2: -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: mvns r1, r5 -; SOFT-NEXT: str r4, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: mov r4, r1 ; SOFT-NEXT: bne .LBB8_4 -; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: .LBB8_3: +; SOFT-NEXT: mov r2, r6 ; SOFT-NEXT: .LBB8_4: -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bl __aeabi_fcmpun -; SOFT-NEXT: mov r3, r0 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: bne .LBB8_6 +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: beq .LBB8_6 ; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r7, r3 ; SOFT-NEXT: .LBB8_6: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: ldr r6, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: bne .LBB8_8 +; SOFT-NEXT: ldr r1, .LCPI8_0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mvns r5, r6 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB8_19 ; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: beq .LBB8_20 ; SOFT-NEXT: .LBB8_8: -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: bne .LBB8_10 -; SOFT-NEXT: @ %bb.9: -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: beq .LBB8_21 +; SOFT-NEXT: .LBB8_9: +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bne .LBB8_11 ; SOFT-NEXT: .LBB8_10: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: beq .LBB8_18 -; SOFT-NEXT: @ %bb.11: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB8_19 -; SOFT-NEXT: .LBB8_12: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB8_14 +; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: .LBB8_11: +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: bl __aeabi_fcmpun +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: beq .LBB8_22 +; SOFT-NEXT: @ %bb.12: +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: beq .LBB8_23 ; SOFT-NEXT: .LBB8_13: -; SOFT-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: beq .LBB8_24 ; SOFT-NEXT: .LBB8_14: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r2, r5 ; SOFT-NEXT: bne .LBB8_16 -; SOFT-NEXT: @ %bb.15: -; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: .LBB8_15: +; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload ; SOFT-NEXT: .LBB8_16: -; SOFT-NEXT: movs r4, #7 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB8_20 -; SOFT-NEXT: @ %bb.17: -; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload -; SOFT-NEXT: b .LBB8_21 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: add sp, #20 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB8_17: +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: bne .LBB8_2 ; SOFT-NEXT: .LBB8_18: -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB8_12 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: beq .LBB8_3 +; SOFT-NEXT: b .LBB8_4 ; SOFT-NEXT: .LBB8_19: -; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB8_13 -; SOFT-NEXT: b .LBB8_14 +; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: bne .LBB8_8 ; SOFT-NEXT: .LBB8_20: -; SOFT-NEXT: mvns r7, r4 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: bne .LBB8_9 ; SOFT-NEXT: .LBB8_21: -; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB8_23 -; SOFT-NEXT: @ %bb.22: -; SOFT-NEXT: mov r4, r7 +; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: beq .LBB8_10 +; SOFT-NEXT: b .LBB8_11 +; SOFT-NEXT: .LBB8_22: +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: bne .LBB8_13 ; SOFT-NEXT: .LBB8_23: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bne .LBB8_25 -; SOFT-NEXT: @ %bb.24: -; SOFT-NEXT: mov r5, r4 -; SOFT-NEXT: .LBB8_25: -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: add sp, #20 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: bne .LBB8_14 +; SOFT-NEXT: .LBB8_24: +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: beq .LBB8_15 +; SOFT-NEXT: b .LBB8_16 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.26: +; SOFT-NEXT: @ %bb.25: ; SOFT-NEXT: .LCPI8_0: ; SOFT-NEXT: .long 1895825407 @ 0x70ffffff ; @@ -943,109 +928,104 @@ define i128 @test_signed_i128_f32(float %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, r7, lr} ; SOFT-NEXT: .pad #20 ; SOFT-NEXT: sub sp, #20 -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: ldr r1, .LCPI9_0 -; SOFT-NEXT: bl __aeabi_fcmpgt ; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r0, #255 ; SOFT-NEXT: lsls r1, r0, #24 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r2, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: str r3, [sp] @ 4-byte Spill -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB9_2 +; SOFT-NEXT: movs r5, #1 +; SOFT-NEXT: lsls r7, r5, #31 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: beq .LBB9_18 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: beq .LBB9_19 ; SOFT-NEXT: .LBB9_2: -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: mvns r1, r5 -; SOFT-NEXT: str r4, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: mov r4, r1 ; SOFT-NEXT: bne .LBB9_4 -; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: .LBB9_3: +; SOFT-NEXT: mov r2, r6 ; SOFT-NEXT: .LBB9_4: -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bl __aeabi_fcmpun -; SOFT-NEXT: mov r3, r0 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: bne .LBB9_6 +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: beq .LBB9_6 ; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r7, r3 ; SOFT-NEXT: .LBB9_6: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: ldr r6, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: bne .LBB9_8 +; SOFT-NEXT: ldr r1, .LCPI9_0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mvns r5, r6 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB9_8 ; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: ldr r7, .LCPI9_1 ; SOFT-NEXT: .LBB9_8: -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: bne .LBB9_10 +; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: beq .LBB9_20 ; SOFT-NEXT: @ %bb.9: -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: beq .LBB9_21 ; SOFT-NEXT: .LBB9_10: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: beq .LBB9_18 -; SOFT-NEXT: @ %bb.11: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB9_19 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bne .LBB9_12 +; SOFT-NEXT: .LBB9_11: +; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload ; SOFT-NEXT: .LBB9_12: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB9_14 -; SOFT-NEXT: .LBB9_13: -; SOFT-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: bl __aeabi_fcmpun +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: beq .LBB9_22 +; SOFT-NEXT: @ %bb.13: +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: beq .LBB9_23 ; SOFT-NEXT: .LBB9_14: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: bne .LBB9_16 -; SOFT-NEXT: @ %bb.15: ; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: beq .LBB9_24 +; SOFT-NEXT: .LBB9_15: +; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: bne .LBB9_17 ; SOFT-NEXT: .LBB9_16: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB9_20 -; SOFT-NEXT: @ %bb.17: -; SOFT-NEXT: ldr r6, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB9_21 -; SOFT-NEXT: b .LBB9_22 +; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: .LBB9_17: +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: add sp, #20 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB9_18: -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB9_12 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: bne .LBB9_2 ; SOFT-NEXT: .LBB9_19: -; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB9_13 -; SOFT-NEXT: b .LBB9_14 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: beq .LBB9_3 +; SOFT-NEXT: b .LBB9_4 ; SOFT-NEXT: .LBB9_20: -; SOFT-NEXT: movs r4, #1 -; SOFT-NEXT: lsls r6, r4, #31 -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB9_22 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: bne .LBB9_10 ; SOFT-NEXT: .LBB9_21: -; SOFT-NEXT: ldr r6, .LCPI9_1 +; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: beq .LBB9_11 +; SOFT-NEXT: b .LBB9_12 ; SOFT-NEXT: .LBB9_22: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bne .LBB9_24 -; SOFT-NEXT: @ %bb.23: -; SOFT-NEXT: mov r5, r6 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: bne .LBB9_14 +; SOFT-NEXT: .LBB9_23: +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: bne .LBB9_15 ; SOFT-NEXT: .LBB9_24: -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: add sp, #20 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: beq .LBB9_16 +; SOFT-NEXT: b .LBB9_17 ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.25: ; SOFT-NEXT: .LCPI9_0: @@ -1216,12 +1196,6 @@ define i8 @test_signed_i8_f64(double %f) nounwind { ; SOFT-NEXT: movs r4, #0 ; SOFT-NEXT: ldr r3, .LCPI11_0 ; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: ldr r3, .LCPI11_1 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: mov r2, r4 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 ; SOFT-NEXT: mov r0, r6 @@ -1229,36 +1203,45 @@ define i8 @test_signed_i8_f64(double %f) nounwind { ; SOFT-NEXT: bl __aeabi_d2iz ; SOFT-NEXT: movs r1, #127 ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB11_2 +; SOFT-NEXT: beq .LBB11_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mvns r0, r1 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: b .LBB11_3 ; SOFT-NEXT: .LBB11_2: -; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB11_4 -; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mov r1, r0 -; SOFT-NEXT: .LBB11_4: ; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: mvns r0, r1 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: .LBB11_3: +; SOFT-NEXT: ldr r3, .LCPI11_1 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: bne .LBB11_5 +; SOFT-NEXT: @ %bb.4: +; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload +; SOFT-NEXT: .LBB11_5: ; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: mov r2, r6 ; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: bl __aeabi_dcmpun ; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: bne .LBB11_6 -; SOFT-NEXT: @ %bb.5: +; SOFT-NEXT: bne .LBB11_7 +; SOFT-NEXT: @ %bb.6: ; SOFT-NEXT: mov r4, r7 -; SOFT-NEXT: .LBB11_6: +; SOFT-NEXT: .LBB11_7: ; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.7: +; SOFT-NEXT: @ %bb.8: ; SOFT-NEXT: .LCPI11_0: -; SOFT-NEXT: .long 1080016896 @ 0x405fc000 -; SOFT-NEXT: .LCPI11_1: ; SOFT-NEXT: .long 3227516928 @ 0xc0600000 +; SOFT-NEXT: .LCPI11_1: +; SOFT-NEXT: .long 1080016896 @ 0x405fc000 ; ; VFP2-LABEL: test_signed_i8_f64: ; VFP2: @ %bb.0: @@ -1327,29 +1310,27 @@ define i13 @test_signed_i13_f64(double %f) nounwind { ; SOFT-NEXT: movs r4, #0 ; SOFT-NEXT: ldr r3, .LCPI12_0 ; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: ldr r3, .LCPI12_1 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: mov r2, r4 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 ; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: bl __aeabi_d2iz ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB12_2 +; SOFT-NEXT: beq .LBB12_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: ldr r0, .LCPI12_2 -; SOFT-NEXT: .LBB12_2: -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB12_4 -; SOFT-NEXT: @ %bb.3: ; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: b .LBB12_5 -; SOFT-NEXT: .LBB12_4: +; SOFT-NEXT: b .LBB12_3 +; SOFT-NEXT: .LBB12_2: +; SOFT-NEXT: ldr r7, .LCPI12_1 +; SOFT-NEXT: .LBB12_3: +; SOFT-NEXT: ldr r3, .LCPI12_2 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB12_5 +; SOFT-NEXT: @ %bb.4: ; SOFT-NEXT: ldr r7, .LCPI12_3 ; SOFT-NEXT: .LBB12_5: ; SOFT-NEXT: mov r0, r6 @@ -1368,11 +1349,11 @@ define i13 @test_signed_i13_f64(double %f) nounwind { ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.8: ; SOFT-NEXT: .LCPI12_0: -; SOFT-NEXT: .long 1085275648 @ 0x40affe00 -; SOFT-NEXT: .LCPI12_1: ; SOFT-NEXT: .long 3232759808 @ 0xc0b00000 -; SOFT-NEXT: .LCPI12_2: +; SOFT-NEXT: .LCPI12_1: ; SOFT-NEXT: .long 4294963200 @ 0xfffff000 +; SOFT-NEXT: .LCPI12_2: +; SOFT-NEXT: .long 1085275648 @ 0x40affe00 ; SOFT-NEXT: .LCPI12_3: ; SOFT-NEXT: .long 4095 @ 0xfff ; @@ -1444,29 +1425,27 @@ define i16 @test_signed_i16_f64(double %f) nounwind { ; SOFT-NEXT: movs r4, #0 ; SOFT-NEXT: ldr r3, .LCPI13_0 ; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: ldr r3, .LCPI13_1 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: mov r2, r4 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 ; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: bl __aeabi_d2iz ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB13_2 +; SOFT-NEXT: beq .LBB13_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: ldr r0, .LCPI13_2 -; SOFT-NEXT: .LBB13_2: -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB13_4 -; SOFT-NEXT: @ %bb.3: ; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: b .LBB13_5 -; SOFT-NEXT: .LBB13_4: +; SOFT-NEXT: b .LBB13_3 +; SOFT-NEXT: .LBB13_2: +; SOFT-NEXT: ldr r7, .LCPI13_1 +; SOFT-NEXT: .LBB13_3: +; SOFT-NEXT: ldr r3, .LCPI13_2 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB13_5 +; SOFT-NEXT: @ %bb.4: ; SOFT-NEXT: ldr r7, .LCPI13_3 ; SOFT-NEXT: .LBB13_5: ; SOFT-NEXT: mov r0, r6 @@ -1485,11 +1464,11 @@ define i16 @test_signed_i16_f64(double %f) nounwind { ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.8: ; SOFT-NEXT: .LCPI13_0: -; SOFT-NEXT: .long 1088421824 @ 0x40dfffc0 -; SOFT-NEXT: .LCPI13_1: ; SOFT-NEXT: .long 3235905536 @ 0xc0e00000 -; SOFT-NEXT: .LCPI13_2: +; SOFT-NEXT: .LCPI13_1: ; SOFT-NEXT: .long 4294934528 @ 0xffff8000 +; SOFT-NEXT: .LCPI13_2: +; SOFT-NEXT: .long 1088421824 @ 0x40dfffc0 ; SOFT-NEXT: .LCPI13_3: ; SOFT-NEXT: .long 32767 @ 0x7fff ; @@ -1561,29 +1540,27 @@ define i19 @test_signed_i19_f64(double %f) nounwind { ; SOFT-NEXT: movs r4, #0 ; SOFT-NEXT: ldr r3, .LCPI14_0 ; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: ldr r3, .LCPI14_1 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: mov r2, r4 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 ; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: bl __aeabi_d2iz ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB14_2 +; SOFT-NEXT: beq .LBB14_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: ldr r0, .LCPI14_2 -; SOFT-NEXT: .LBB14_2: -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB14_4 -; SOFT-NEXT: @ %bb.3: ; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: b .LBB14_5 -; SOFT-NEXT: .LBB14_4: +; SOFT-NEXT: b .LBB14_3 +; SOFT-NEXT: .LBB14_2: +; SOFT-NEXT: ldr r7, .LCPI14_1 +; SOFT-NEXT: .LBB14_3: +; SOFT-NEXT: ldr r3, .LCPI14_2 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB14_5 +; SOFT-NEXT: @ %bb.4: ; SOFT-NEXT: ldr r7, .LCPI14_3 ; SOFT-NEXT: .LBB14_5: ; SOFT-NEXT: mov r0, r6 @@ -1602,11 +1579,11 @@ define i19 @test_signed_i19_f64(double %f) nounwind { ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.8: ; SOFT-NEXT: .LCPI14_0: -; SOFT-NEXT: .long 1091567608 @ 0x410ffff8 -; SOFT-NEXT: .LCPI14_1: ; SOFT-NEXT: .long 3239051264 @ 0xc1100000 -; SOFT-NEXT: .LCPI14_2: +; SOFT-NEXT: .LCPI14_1: ; SOFT-NEXT: .long 4294705152 @ 0xfffc0000 +; SOFT-NEXT: .LCPI14_2: +; SOFT-NEXT: .long 1091567608 @ 0x410ffff8 ; SOFT-NEXT: .LCPI14_3: ; SOFT-NEXT: .long 262143 @ 0x3ffff ; @@ -1674,58 +1651,56 @@ define i32 @test_signed_i32_f64(double %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, r7, lr} ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 -; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: ldr r2, .LCPI15_0 -; SOFT-NEXT: ldr r3, .LCPI15_1 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: ldr r3, .LCPI15_2 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: ldr r3, .LCPI15_0 +; SOFT-NEXT: mov r2, r4 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: bl __aeabi_d2iz ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB15_2 +; SOFT-NEXT: beq .LBB15_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: movs r0, #1 -; SOFT-NEXT: lsls r0, r0, #31 -; SOFT-NEXT: .LBB15_2: -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB15_4 -; SOFT-NEXT: @ %bb.3: ; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: b .LBB15_5 -; SOFT-NEXT: .LBB15_4: +; SOFT-NEXT: b .LBB15_3 +; SOFT-NEXT: .LBB15_2: +; SOFT-NEXT: movs r0, #1 +; SOFT-NEXT: lsls r7, r0, #31 +; SOFT-NEXT: .LBB15_3: +; SOFT-NEXT: ldr r2, .LCPI15_1 +; SOFT-NEXT: ldr r3, .LCPI15_2 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB15_5 +; SOFT-NEXT: @ %bb.4: ; SOFT-NEXT: ldr r7, .LCPI15_3 ; SOFT-NEXT: .LBB15_5: -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: bl __aeabi_dcmpun ; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: bne .LBB15_7 ; SOFT-NEXT: @ %bb.6: -; SOFT-NEXT: mov r6, r7 +; SOFT-NEXT: mov r4, r7 ; SOFT-NEXT: .LBB15_7: -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.8: ; SOFT-NEXT: .LCPI15_0: -; SOFT-NEXT: .long 4290772992 @ 0xffc00000 +; SOFT-NEXT: .long 3252682752 @ 0xc1e00000 ; SOFT-NEXT: .LCPI15_1: -; SOFT-NEXT: .long 1105199103 @ 0x41dfffff +; SOFT-NEXT: .long 4290772992 @ 0xffc00000 ; SOFT-NEXT: .LCPI15_2: -; SOFT-NEXT: .long 3252682752 @ 0xc1e00000 +; SOFT-NEXT: .long 1105199103 @ 0x41dfffff ; SOFT-NEXT: .LCPI15_3: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -1753,82 +1728,73 @@ define i50 @test_signed_i50_f64(double %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, r7, lr} ; SOFT-NEXT: .pad #12 ; SOFT-NEXT: sub sp, #12 -; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: mov r5, r1 ; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: movs r0, #15 -; SOFT-NEXT: mvns r2, r0 -; SOFT-NEXT: ldr r3, .LCPI16_0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: movs r0, #195 ; SOFT-NEXT: lsls r3, r0, #24 ; SOFT-NEXT: movs r4, #0 ; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r7 ; SOFT-NEXT: mov r2, r4 ; SOFT-NEXT: bl __aeabi_dcmpge -; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: mov r7, r0 ; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: bl __aeabi_d2lz -; SOFT-NEXT: mov r2, r0 -; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: cmp r7, #0 ; SOFT-NEXT: bne .LBB16_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill ; SOFT-NEXT: .LBB16_2: -; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: bne .LBB16_4 +; SOFT-NEXT: beq .LBB16_4 ; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: str r2, [sp] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: b .LBB16_5 ; SOFT-NEXT: .LBB16_4: -; SOFT-NEXT: mvns r0, r4 -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: ldr r0, .LCPI16_0 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: .LBB16_5: +; SOFT-NEXT: movs r0, #15 +; SOFT-NEXT: mvns r2, r0 +; SOFT-NEXT: ldr r3, .LCPI16_1 ; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: mov r3, r7 -; SOFT-NEXT: bl __aeabi_dcmpun -; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: mvns r7, r4 ; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bne .LBB16_7 +; SOFT-NEXT: beq .LBB16_7 ; SOFT-NEXT: @ %bb.6: -; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: ldr r0, .LCPI16_2 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: .LBB16_7: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB16_9 +; SOFT-NEXT: bne .LBB16_9 ; SOFT-NEXT: @ %bb.8: -; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: b .LBB16_10 +; SOFT-NEXT: ldr r7, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: .LBB16_9: -; SOFT-NEXT: ldr r3, .LCPI16_1 -; SOFT-NEXT: .LBB16_10: -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB16_12 -; SOFT-NEXT: @ %bb.11: -; SOFT-NEXT: ldr r3, .LCPI16_2 -; SOFT-NEXT: .LBB16_12: -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB16_14 -; SOFT-NEXT: @ %bb.13: -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: .LBB16_14: +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: bl __aeabi_dcmpun +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bne .LBB16_11 +; SOFT-NEXT: @ %bb.10: +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: .LBB16_11: +; SOFT-NEXT: bne .LBB16_13 +; SOFT-NEXT: @ %bb.12: +; SOFT-NEXT: ldr r4, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: .LBB16_13: ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: add sp, #12 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.15: +; SOFT-NEXT: @ %bb.14: ; SOFT-NEXT: .LCPI16_0: -; SOFT-NEXT: .long 1124073471 @ 0x42ffffff -; SOFT-NEXT: .LCPI16_1: ; SOFT-NEXT: .long 4294836224 @ 0xfffe0000 +; SOFT-NEXT: .LCPI16_1: +; SOFT-NEXT: .long 1124073471 @ 0x42ffffff ; SOFT-NEXT: .LCPI16_2: ; SOFT-NEXT: .long 131071 @ 0x1ffff ; @@ -1850,11 +1816,12 @@ define i50 @test_signed_i50_f64(double %f) nounwind { ; VFP2-NEXT: movlt r0, #0 ; VFP2-NEXT: vcmp.f64 d17, d18 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: ittt gt -; VFP2-NEXT: movwgt r1, #65535 -; VFP2-NEXT: movtgt r1, #1 +; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r0, #-1 ; VFP2-NEXT: vcmp.f64 d17, d17 +; VFP2-NEXT: itt gt +; VFP2-NEXT: movwgt r1, #65535 +; VFP2-NEXT: movtgt r1, #1 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr ; VFP2-NEXT: itt vs ; VFP2-NEXT: movvs r0, #0 @@ -1908,76 +1875,69 @@ define i64 @test_signed_i64_f64(double %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, r7, lr} ; SOFT-NEXT: .pad #12 ; SOFT-NEXT: sub sp, #12 -; SOFT-NEXT: mov r6, r1 -; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: mov r5, r1 +; SOFT-NEXT: mov r6, r0 ; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: mvns r2, r4 ; SOFT-NEXT: ldr r3, .LCPI17_0 -; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: ldr r3, .LCPI17_1 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: mov r2, r4 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: bl __aeabi_d2lz -; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: movs r0, #1 +; SOFT-NEXT: lsls r0, r0, #31 ; SOFT-NEXT: cmp r7, #0 ; SOFT-NEXT: bne .LBB17_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill ; SOFT-NEXT: .LBB17_2: -; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB17_4 +; SOFT-NEXT: beq .LBB17_4 ; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: str r0, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r0, r1 ; SOFT-NEXT: .LBB17_4: -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: bl __aeabi_dcmpun -; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mvns r7, r4 +; SOFT-NEXT: ldr r3, .LCPI17_1 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: bl __aeabi_dcmpgt ; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bne .LBB17_6 +; SOFT-NEXT: beq .LBB17_6 ; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: ldr r0, .LCPI17_2 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: .LBB17_6: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB17_8 +; SOFT-NEXT: bne .LBB17_8 ; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: ldr r3, [sp] @ 4-byte Reload -; SOFT-NEXT: b .LBB17_9 +; SOFT-NEXT: ldr r7, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: .LBB17_8: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: lsls r3, r2, #31 -; SOFT-NEXT: .LBB17_9: -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB17_11 -; SOFT-NEXT: @ %bb.10: -; SOFT-NEXT: ldr r3, .LCPI17_2 -; SOFT-NEXT: .LBB17_11: -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB17_13 -; SOFT-NEXT: @ %bb.12: -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: .LBB17_13: +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: bl __aeabi_dcmpun +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bne .LBB17_10 +; SOFT-NEXT: @ %bb.9: +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: .LBB17_10: +; SOFT-NEXT: bne .LBB17_12 +; SOFT-NEXT: @ %bb.11: +; SOFT-NEXT: ldr r4, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: .LBB17_12: ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: add sp, #12 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.14: +; SOFT-NEXT: @ %bb.13: ; SOFT-NEXT: .LCPI17_0: -; SOFT-NEXT: .long 1138753535 @ 0x43dfffff -; SOFT-NEXT: .LCPI17_1: ; SOFT-NEXT: .long 3286237184 @ 0xc3e00000 +; SOFT-NEXT: .LCPI17_1: +; SOFT-NEXT: .long 1138753535 @ 0x43dfffff ; SOFT-NEXT: .LCPI17_2: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -2059,122 +2019,119 @@ define i100 @test_signed_i100_f64(double %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #28 -; SOFT-NEXT: sub sp, #28 +; SOFT-NEXT: .pad #20 +; SOFT-NEXT: sub sp, #20 ; SOFT-NEXT: mov r5, r1 ; SOFT-NEXT: mov r6, r0 ; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: mvns r2, r4 ; SOFT-NEXT: ldr r3, .LCPI18_0 -; SOFT-NEXT: str r2, [sp, #24] @ 4-byte Spill -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill -; SOFT-NEXT: ldr r3, .LCPI18_1 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: mov r2, r4 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 ; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: str r2, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: movs r0, #7 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mvns r0, r0 ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB18_2 +; SOFT-NEXT: beq .LBB18_17 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: beq .LBB18_18 ; SOFT-NEXT: .LBB18_2: -; SOFT-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: ldr r1, [sp, #24] @ 4-byte Reload -; SOFT-NEXT: bne .LBB18_4 -; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: beq .LBB18_19 +; SOFT-NEXT: .LBB18_3: +; SOFT-NEXT: beq .LBB18_5 ; SOFT-NEXT: .LBB18_4: +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: .LBB18_5: +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: mvns r7, r4 +; SOFT-NEXT: ldr r3, .LCPI18_1 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB18_20 +; SOFT-NEXT: @ %bb.6: +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: beq .LBB18_21 +; SOFT-NEXT: .LBB18_7: +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: bne .LBB18_9 +; SOFT-NEXT: .LBB18_8: +; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: .LBB18_9: ; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bne .LBB18_11 +; SOFT-NEXT: @ %bb.10: +; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: .LBB18_11: ; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: mov r2, r6 ; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: bl __aeabi_dcmpun -; SOFT-NEXT: mov r3, r0 ; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bne .LBB18_6 -; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: .LBB18_6: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: ldr r6, [sp, #20] @ 4-byte Reload -; SOFT-NEXT: bne .LBB18_8 -; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: .LBB18_8: -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload -; SOFT-NEXT: bne .LBB18_10 -; SOFT-NEXT: @ %bb.9: -; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB18_10: -; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: beq .LBB18_22 +; SOFT-NEXT: @ %bb.12: ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: beq .LBB18_18 -; SOFT-NEXT: @ %bb.11: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB18_19 -; SOFT-NEXT: .LBB18_12: -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB18_14 +; SOFT-NEXT: beq .LBB18_23 ; SOFT-NEXT: .LBB18_13: -; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: str r2, [sp, #24] @ 4-byte Spill -; SOFT-NEXT: .LBB18_14: -; SOFT-NEXT: cmp r3, #0 ; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: beq .LBB18_24 +; SOFT-NEXT: .LBB18_14: ; SOFT-NEXT: bne .LBB18_16 -; SOFT-NEXT: @ %bb.15: -; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: .LBB18_15: +; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload ; SOFT-NEXT: .LBB18_16: -; SOFT-NEXT: movs r5, #7 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB18_20 -; SOFT-NEXT: @ %bb.17: -; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB18_21 -; SOFT-NEXT: b .LBB18_22 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: add sp, #20 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB18_17: +; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: bne .LBB18_2 ; SOFT-NEXT: .LBB18_18: -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB18_12 +; SOFT-NEXT: str r7, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bne .LBB18_3 ; SOFT-NEXT: .LBB18_19: -; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB18_13 -; SOFT-NEXT: b .LBB18_14 +; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: bne .LBB18_4 +; SOFT-NEXT: b .LBB18_5 ; SOFT-NEXT: .LBB18_20: -; SOFT-NEXT: mvns r7, r5 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB18_22 +; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: bne .LBB18_7 ; SOFT-NEXT: .LBB18_21: -; SOFT-NEXT: mov r5, r7 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: beq .LBB18_8 +; SOFT-NEXT: b .LBB18_9 ; SOFT-NEXT: .LBB18_22: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bne .LBB18_24 -; SOFT-NEXT: @ %bb.23: -; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: bne .LBB18_13 +; SOFT-NEXT: .LBB18_23: +; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: bne .LBB18_14 ; SOFT-NEXT: .LBB18_24: -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: add sp, #28 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: beq .LBB18_15 +; SOFT-NEXT: b .LBB18_16 ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.25: ; SOFT-NEXT: .LCPI18_0: -; SOFT-NEXT: .long 1176502271 @ 0x461fffff -; SOFT-NEXT: .LCPI18_1: ; SOFT-NEXT: .long 3323985920 @ 0xc6200000 +; SOFT-NEXT: .LCPI18_1: +; SOFT-NEXT: .long 1176502271 @ 0x461fffff ; ; VFP2-LABEL: test_signed_i100_f64: ; VFP2: @ %bb.0: @@ -2266,122 +2223,118 @@ define i128 @test_signed_i128_f64(double %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #28 -; SOFT-NEXT: sub sp, #28 +; SOFT-NEXT: .pad #20 +; SOFT-NEXT: sub sp, #20 ; SOFT-NEXT: mov r5, r1 ; SOFT-NEXT: mov r6, r0 ; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: mvns r2, r4 ; SOFT-NEXT: ldr r3, .LCPI19_0 -; SOFT-NEXT: str r2, [sp, #24] @ 4-byte Spill -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp, #20] @ 4-byte Spill -; SOFT-NEXT: ldr r3, .LCPI19_1 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: mov r2, r4 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 ; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: bl __fixdfti -; SOFT-NEXT: str r1, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: str r2, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: movs r0, #1 +; SOFT-NEXT: lsls r0, r0, #31 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB19_2 +; SOFT-NEXT: beq .LBB19_17 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: beq .LBB19_18 ; SOFT-NEXT: .LBB19_2: -; SOFT-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: ldr r1, [sp, #24] @ 4-byte Reload -; SOFT-NEXT: bne .LBB19_4 -; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: beq .LBB19_19 +; SOFT-NEXT: .LBB19_3: +; SOFT-NEXT: beq .LBB19_5 ; SOFT-NEXT: .LBB19_4: +; SOFT-NEXT: str r3, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: .LBB19_5: +; SOFT-NEXT: mvns r7, r4 +; SOFT-NEXT: ldr r3, .LCPI19_1 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: bne .LBB19_20 +; SOFT-NEXT: @ %bb.6: +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: beq .LBB19_21 +; SOFT-NEXT: .LBB19_7: +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: bne .LBB19_9 +; SOFT-NEXT: .LBB19_8: +; SOFT-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: .LBB19_9: ; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bne .LBB19_11 +; SOFT-NEXT: @ %bb.10: +; SOFT-NEXT: ldr r7, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: .LBB19_11: ; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: mov r2, r6 ; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: bl __aeabi_dcmpun -; SOFT-NEXT: mov r3, r0 ; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bne .LBB19_6 -; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: .LBB19_6: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: ldr r6, [sp, #20] @ 4-byte Reload -; SOFT-NEXT: bne .LBB19_8 -; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: .LBB19_8: -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload -; SOFT-NEXT: bne .LBB19_10 -; SOFT-NEXT: @ %bb.9: -; SOFT-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; SOFT-NEXT: .LBB19_10: -; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: beq .LBB19_22 +; SOFT-NEXT: @ %bb.12: ; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: beq .LBB19_18 -; SOFT-NEXT: @ %bb.11: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB19_19 -; SOFT-NEXT: .LBB19_12: -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB19_14 +; SOFT-NEXT: beq .LBB19_23 ; SOFT-NEXT: .LBB19_13: -; SOFT-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: str r2, [sp, #24] @ 4-byte Spill -; SOFT-NEXT: .LBB19_14: -; SOFT-NEXT: cmp r3, #0 ; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: beq .LBB19_24 +; SOFT-NEXT: .LBB19_14: ; SOFT-NEXT: bne .LBB19_16 -; SOFT-NEXT: @ %bb.15: -; SOFT-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; SOFT-NEXT: .LBB19_15: +; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload ; SOFT-NEXT: .LBB19_16: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB19_20 -; SOFT-NEXT: @ %bb.17: -; SOFT-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB19_21 -; SOFT-NEXT: b .LBB19_22 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: add sp, #20 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB19_17: +; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: bne .LBB19_2 ; SOFT-NEXT: .LBB19_18: -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB19_12 +; SOFT-NEXT: str r7, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bne .LBB19_3 ; SOFT-NEXT: .LBB19_19: -; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB19_13 -; SOFT-NEXT: b .LBB19_14 +; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: bne .LBB19_4 +; SOFT-NEXT: b .LBB19_5 ; SOFT-NEXT: .LBB19_20: -; SOFT-NEXT: movs r5, #1 -; SOFT-NEXT: lsls r5, r5, #31 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB19_22 +; SOFT-NEXT: ldr r0, .LCPI19_2 +; SOFT-NEXT: str r0, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: bne .LBB19_7 ; SOFT-NEXT: .LBB19_21: -; SOFT-NEXT: ldr r5, .LCPI19_2 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: beq .LBB19_8 +; SOFT-NEXT: b .LBB19_9 ; SOFT-NEXT: .LBB19_22: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bne .LBB19_24 -; SOFT-NEXT: @ %bb.23: -; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: bne .LBB19_13 +; SOFT-NEXT: .LBB19_23: +; SOFT-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: bne .LBB19_14 ; SOFT-NEXT: .LBB19_24: -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: add sp, #28 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: beq .LBB19_15 +; SOFT-NEXT: b .LBB19_16 ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.25: ; SOFT-NEXT: .LCPI19_0: -; SOFT-NEXT: .long 1205862399 @ 0x47dfffff -; SOFT-NEXT: .LCPI19_1: ; SOFT-NEXT: .long 3353346048 @ 0xc7e00000 +; SOFT-NEXT: .LCPI19_1: +; SOFT-NEXT: .long 1205862399 @ 0x47dfffff ; SOFT-NEXT: .LCPI19_2: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; @@ -2964,10 +2917,10 @@ define i19 @test_signed_i19_f16(half %f) nounwind { ; VFP2-NEXT: movlt r0, #0 ; VFP2-NEXT: movtlt r0, #65532 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr +; VFP2-NEXT: vcmp.f32 s0, s0 ; VFP2-NEXT: itt gt ; VFP2-NEXT: movwgt r0, #65535 ; VFP2-NEXT: movtgt r0, #3 -; VFP2-NEXT: vcmp.f32 s0, s0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr ; VFP2-NEXT: it vs ; VFP2-NEXT: movvs r0, #0 @@ -3078,73 +3031,69 @@ define i50 @test_signed_i50_f16(half %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 +; SOFT-NEXT: .pad #4 +; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI26_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r0, #27 ; SOFT-NEXT: lsls r1, r0, #27 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: cmp r7, #0 ; SOFT-NEXT: bne .LBB26_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r5, r7 ; SOFT-NEXT: .LBB26_2: -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r0, #0 ; SOFT-NEXT: beq .LBB26_4 ; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mvns r4, r6 +; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: b .LBB26_5 ; SOFT-NEXT: .LBB26_4: -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: bl __aeabi_fcmpun -; SOFT-NEXT: mov r1, r0 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: bne .LBB26_6 -; SOFT-NEXT: @ %bb.5: +; SOFT-NEXT: ldr r0, .LCPI26_0 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: .LBB26_5: +; SOFT-NEXT: ldr r1, .LCPI26_1 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: .LBB26_6: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB26_8 -; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: b .LBB26_9 -; SOFT-NEXT: .LBB26_8: -; SOFT-NEXT: ldr r3, .LCPI26_1 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: movs r7, #0 +; SOFT-NEXT: mvns r6, r7 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB26_7 +; SOFT-NEXT: @ %bb.6: +; SOFT-NEXT: ldr r0, .LCPI26_2 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: .LBB26_7: +; SOFT-NEXT: bne .LBB26_9 +; SOFT-NEXT: @ %bb.8: +; SOFT-NEXT: mov r6, r5 ; SOFT-NEXT: .LBB26_9: -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB26_11 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: bl __aeabi_fcmpun +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: bne .LBB26_11 ; SOFT-NEXT: @ %bb.10: -; SOFT-NEXT: ldr r3, .LCPI26_2 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: .LBB26_11: -; SOFT-NEXT: cmp r1, #0 ; SOFT-NEXT: bne .LBB26_13 ; SOFT-NEXT: @ %bb.12: -; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload ; SOFT-NEXT: .LBB26_13: -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.14: ; SOFT-NEXT: .LCPI26_0: -; SOFT-NEXT: .long 1476395007 @ 0x57ffffff -; SOFT-NEXT: .LCPI26_1: ; SOFT-NEXT: .long 4294836224 @ 0xfffe0000 +; SOFT-NEXT: .LCPI26_1: +; SOFT-NEXT: .long 1476395007 @ 0x57ffffff ; SOFT-NEXT: .LCPI26_2: ; SOFT-NEXT: .long 131071 @ 0x1ffff ; @@ -3166,11 +3115,12 @@ define i50 @test_signed_i50_f16(half %f) nounwind { ; VFP2-NEXT: movlt r0, #0 ; VFP2-NEXT: vcmp.f32 s2, s4 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: ittt gt -; VFP2-NEXT: movwgt r1, #65535 -; VFP2-NEXT: movtgt r1, #1 +; VFP2-NEXT: it gt ; VFP2-NEXT: movgt.w r0, #-1 ; VFP2-NEXT: vcmp.f32 s2, s2 +; VFP2-NEXT: itt gt +; VFP2-NEXT: movwgt r1, #65535 +; VFP2-NEXT: movtgt r1, #1 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr ; VFP2-NEXT: itt vs ; VFP2-NEXT: movvs r0, #0 @@ -3203,11 +3153,12 @@ define i50 @test_signed_i50_f16(half %f) nounwind { ; FP16-NEXT: movlt r0, #0 ; FP16-NEXT: vcmp.f32 s16, s2 ; FP16-NEXT: vmrs APSR_nzcv, fpscr -; FP16-NEXT: ittt gt -; FP16-NEXT: movwgt r1, #65535 -; FP16-NEXT: movtgt r1, #1 +; FP16-NEXT: it gt ; FP16-NEXT: movgt.w r0, #-1 ; FP16-NEXT: vcmp.f32 s16, s16 +; FP16-NEXT: itt gt +; FP16-NEXT: movwgt r1, #65535 +; FP16-NEXT: movtgt r1, #1 ; FP16-NEXT: vmrs APSR_nzcv, fpscr ; FP16-NEXT: itt vs ; FP16-NEXT: movvs r0, #0 @@ -3229,70 +3180,62 @@ define i64 @test_signed_i64_f16(half %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #12 -; SOFT-NEXT: sub sp, #12 +; SOFT-NEXT: .pad #4 +; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI27_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r0, #223 ; SOFT-NEXT: lsls r1, r0, #24 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __aeabi_f2lz -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: movs r2, #1 +; SOFT-NEXT: lsls r7, r2, #31 +; SOFT-NEXT: cmp r6, #0 ; SOFT-NEXT: bne .LBB27_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r4, r5 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: .LBB27_2: -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: str r0, [sp] @ 4-byte Spill ; SOFT-NEXT: beq .LBB27_4 ; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mvns r4, r6 +; SOFT-NEXT: mov r7, r1 ; SOFT-NEXT: .LBB27_4: -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: bl __aeabi_fcmpun -; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: ldr r1, .LCPI27_0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mvns r5, r6 ; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: bne .LBB27_6 +; SOFT-NEXT: beq .LBB27_6 ; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: ldr r7, .LCPI27_1 ; SOFT-NEXT: .LBB27_6: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB27_8 +; SOFT-NEXT: bne .LBB27_8 ; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; SOFT-NEXT: b .LBB27_9 +; SOFT-NEXT: ldr r5, [sp] @ 4-byte Reload ; SOFT-NEXT: .LBB27_8: -; SOFT-NEXT: movs r2, #1 -; SOFT-NEXT: lsls r3, r2, #31 -; SOFT-NEXT: .LBB27_9: -; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB27_11 -; SOFT-NEXT: @ %bb.10: -; SOFT-NEXT: ldr r3, .LCPI27_1 -; SOFT-NEXT: .LBB27_11: -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: bne .LBB27_13 -; SOFT-NEXT: @ %bb.12: -; SOFT-NEXT: mov r6, r3 -; SOFT-NEXT: .LBB27_13: +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: bl __aeabi_fcmpun +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: bne .LBB27_10 +; SOFT-NEXT: @ %bb.9: +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: .LBB27_10: +; SOFT-NEXT: bne .LBB27_12 +; SOFT-NEXT: @ %bb.11: +; SOFT-NEXT: mov r6, r7 +; SOFT-NEXT: .LBB27_12: ; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.14: +; SOFT-NEXT: @ %bb.13: ; SOFT-NEXT: .LCPI27_0: ; SOFT-NEXT: .long 1593835519 @ 0x5effffff ; SOFT-NEXT: .LCPI27_1: @@ -3379,110 +3322,106 @@ define i100 @test_signed_i100_f16(half %f) nounwind { ; SOFT-NEXT: sub sp, #20 ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: ldr r1, .LCPI28_0 -; SOFT-NEXT: bl __aeabi_fcmpgt ; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r0, #241 ; SOFT-NEXT: lsls r1, r0, #24 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r2, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: str r3, [sp] @ 4-byte Spill -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB28_2 +; SOFT-NEXT: movs r5, #7 +; SOFT-NEXT: str r5, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mvns r7, r5 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: beq .LBB28_17 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: beq .LBB28_18 ; SOFT-NEXT: .LBB28_2: -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: mvns r1, r5 -; SOFT-NEXT: str r4, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: mov r4, r1 ; SOFT-NEXT: bne .LBB28_4 -; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: .LBB28_3: +; SOFT-NEXT: mov r2, r6 ; SOFT-NEXT: .LBB28_4: -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bl __aeabi_fcmpun -; SOFT-NEXT: mov r3, r0 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: bne .LBB28_6 +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: beq .LBB28_6 ; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r7, r3 ; SOFT-NEXT: .LBB28_6: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: ldr r6, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: bne .LBB28_8 +; SOFT-NEXT: ldr r1, .LCPI28_0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mvns r5, r6 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB28_19 ; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: beq .LBB28_20 ; SOFT-NEXT: .LBB28_8: -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: bne .LBB28_10 -; SOFT-NEXT: @ %bb.9: -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: beq .LBB28_21 +; SOFT-NEXT: .LBB28_9: +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bne .LBB28_11 ; SOFT-NEXT: .LBB28_10: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: beq .LBB28_18 -; SOFT-NEXT: @ %bb.11: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB28_19 -; SOFT-NEXT: .LBB28_12: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB28_14 +; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: .LBB28_11: +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: bl __aeabi_fcmpun +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: beq .LBB28_22 +; SOFT-NEXT: @ %bb.12: +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: beq .LBB28_23 ; SOFT-NEXT: .LBB28_13: -; SOFT-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: beq .LBB28_24 ; SOFT-NEXT: .LBB28_14: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r2, r5 ; SOFT-NEXT: bne .LBB28_16 -; SOFT-NEXT: @ %bb.15: -; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: .LBB28_15: +; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload ; SOFT-NEXT: .LBB28_16: -; SOFT-NEXT: movs r4, #7 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB28_20 -; SOFT-NEXT: @ %bb.17: -; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload -; SOFT-NEXT: b .LBB28_21 +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: add sp, #20 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB28_17: +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: bne .LBB28_2 ; SOFT-NEXT: .LBB28_18: -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB28_12 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: beq .LBB28_3 +; SOFT-NEXT: b .LBB28_4 ; SOFT-NEXT: .LBB28_19: -; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB28_13 -; SOFT-NEXT: b .LBB28_14 +; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: bne .LBB28_8 ; SOFT-NEXT: .LBB28_20: -; SOFT-NEXT: mvns r7, r4 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: bne .LBB28_9 ; SOFT-NEXT: .LBB28_21: -; SOFT-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB28_23 -; SOFT-NEXT: @ %bb.22: -; SOFT-NEXT: mov r4, r7 +; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: beq .LBB28_10 +; SOFT-NEXT: b .LBB28_11 +; SOFT-NEXT: .LBB28_22: +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: bne .LBB28_13 ; SOFT-NEXT: .LBB28_23: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bne .LBB28_25 -; SOFT-NEXT: @ %bb.24: -; SOFT-NEXT: mov r5, r4 -; SOFT-NEXT: .LBB28_25: -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: add sp, #20 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: bne .LBB28_14 +; SOFT-NEXT: .LBB28_24: +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: beq .LBB28_15 +; SOFT-NEXT: b .LBB28_16 ; SOFT-NEXT: .p2align 2 -; SOFT-NEXT: @ %bb.26: +; SOFT-NEXT: @ %bb.25: ; SOFT-NEXT: .LCPI28_0: ; SOFT-NEXT: .long 1895825407 @ 0x70ffffff ; @@ -3579,109 +3518,104 @@ define i128 @test_signed_i128_f16(half %f) nounwind { ; SOFT-NEXT: sub sp, #20 ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: ldr r1, .LCPI29_0 -; SOFT-NEXT: bl __aeabi_fcmpgt ; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r0, #255 ; SOFT-NEXT: lsls r1, r0, #24 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __fixsfti -; SOFT-NEXT: str r1, [sp, #4] @ 4-byte Spill -; SOFT-NEXT: str r2, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: str r3, [sp] @ 4-byte Spill -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB29_2 +; SOFT-NEXT: movs r5, #1 +; SOFT-NEXT: lsls r7, r5, #31 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: beq .LBB29_18 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: beq .LBB29_19 ; SOFT-NEXT: .LBB29_2: -; SOFT-NEXT: movs r5, #0 -; SOFT-NEXT: mvns r1, r5 -; SOFT-NEXT: str r4, [sp, #16] @ 4-byte Spill -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill -; SOFT-NEXT: mov r4, r1 ; SOFT-NEXT: bne .LBB29_4 -; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: .LBB29_3: +; SOFT-NEXT: mov r2, r6 ; SOFT-NEXT: .LBB29_4: -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bl __aeabi_fcmpun -; SOFT-NEXT: mov r3, r0 -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: bne .LBB29_6 +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r1, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: str r0, [sp, #12] @ 4-byte Spill +; SOFT-NEXT: beq .LBB29_6 ; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r7, r3 ; SOFT-NEXT: .LBB29_6: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: ldr r6, [sp, #8] @ 4-byte Reload -; SOFT-NEXT: bne .LBB29_8 +; SOFT-NEXT: ldr r1, .LCPI29_0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mvns r5, r6 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB29_8 ; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: str r7, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: ldr r7, .LCPI29_1 ; SOFT-NEXT: .LBB29_8: -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: bne .LBB29_10 +; SOFT-NEXT: str r7, [sp, #16] @ 4-byte Spill +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: beq .LBB29_20 ; SOFT-NEXT: @ %bb.9: -; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: beq .LBB29_21 ; SOFT-NEXT: .LBB29_10: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: beq .LBB29_18 -; SOFT-NEXT: @ %bb.11: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB29_19 +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: bne .LBB29_12 +; SOFT-NEXT: .LBB29_11: +; SOFT-NEXT: ldr r5, [sp, #12] @ 4-byte Reload ; SOFT-NEXT: .LBB29_12: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB29_14 -; SOFT-NEXT: .LBB29_13: -; SOFT-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: bl __aeabi_fcmpun +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: beq .LBB29_22 +; SOFT-NEXT: @ %bb.13: +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: beq .LBB29_23 ; SOFT-NEXT: .LBB29_14: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: bne .LBB29_16 -; SOFT-NEXT: @ %bb.15: ; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: beq .LBB29_24 +; SOFT-NEXT: .LBB29_15: +; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: bne .LBB29_17 ; SOFT-NEXT: .LBB29_16: -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB29_20 -; SOFT-NEXT: @ %bb.17: -; SOFT-NEXT: ldr r6, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB29_21 -; SOFT-NEXT: b .LBB29_22 +; SOFT-NEXT: mov r6, r3 +; SOFT-NEXT: .LBB29_17: +; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: add sp, #20 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB29_18: -; SOFT-NEXT: mov r1, r2 -; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: bne .LBB29_12 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: bne .LBB29_2 ; SOFT-NEXT: .LBB29_19: -; SOFT-NEXT: str r7, [sp, #12] @ 4-byte Spill -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB29_13 -; SOFT-NEXT: b .LBB29_14 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: beq .LBB29_3 +; SOFT-NEXT: b .LBB29_4 ; SOFT-NEXT: .LBB29_20: -; SOFT-NEXT: movs r4, #1 -; SOFT-NEXT: lsls r6, r4, #31 -; SOFT-NEXT: ldr r4, [sp, #16] @ 4-byte Reload -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB29_22 +; SOFT-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: bne .LBB29_10 ; SOFT-NEXT: .LBB29_21: -; SOFT-NEXT: ldr r6, .LCPI29_1 +; SOFT-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: str r0, [sp, #8] @ 4-byte Spill +; SOFT-NEXT: beq .LBB29_11 +; SOFT-NEXT: b .LBB29_12 ; SOFT-NEXT: .LBB29_22: -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: bne .LBB29_24 -; SOFT-NEXT: @ %bb.23: -; SOFT-NEXT: mov r5, r6 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: bne .LBB29_14 +; SOFT-NEXT: .LBB29_23: +; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: bne .LBB29_15 ; SOFT-NEXT: .LBB29_24: -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: add sp, #20 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; SOFT-NEXT: beq .LBB29_16 +; SOFT-NEXT: b .LBB29_17 ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.25: ; SOFT-NEXT: .LCPI29_0: diff --git a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll index 14eb67104edda..4cc5f943dadff 100644 --- a/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/ARM/fptoui-sat-scalar.ll @@ -24,41 +24,38 @@ define i1 @test_signed_i1_f32(float %f) nounwind { ; SOFT-NEXT: .save {r4, r5, r6, lr} ; SOFT-NEXT: push {r4, r5, r6, lr} ; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: movs r0, #127 -; SOFT-NEXT: lsls r1, r0, #23 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bl __aeabi_fcmpge ; SOFT-NEXT: mov r6, r0 ; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bl __aeabi_f2uiz +; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB0_3 +; SOFT-NEXT: bne .LBB0_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB0_4 +; SOFT-NEXT: mov r4, r6 ; SOFT-NEXT: .LBB0_2: -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB0_3: -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB0_2 +; SOFT-NEXT: movs r0, #127 +; SOFT-NEXT: lsls r1, r0, #23 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB0_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: movs r4, #1 ; SOFT-NEXT: .LBB0_4: -; SOFT-NEXT: movs r0, #1 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: pop {r4, r5, r6, pc} ; ; VFP2-LABEL: test_signed_i1_f32: ; VFP2: @ %bb.0: -; VFP2-NEXT: vmov s2, r0 -; VFP2-NEXT: vmov.f32 s0, #1.000000e+00 -; VFP2-NEXT: vcvt.u32.f32 s4, s2 -; VFP2-NEXT: vcmp.f32 s2, #0 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vmov.f32 s4, #1.000000e+00 +; VFP2-NEXT: vcvt.u32.f32 s2, s0 +; VFP2-NEXT: vcmp.f32 s0, #0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s0 -; VFP2-NEXT: vmov r0, s4 +; VFP2-NEXT: vcmp.f32 s0, s4 +; VFP2-NEXT: vmov r0, s2 ; VFP2-NEXT: it lt ; VFP2-NEXT: movlt r0, #0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr @@ -89,29 +86,27 @@ define i8 @test_signed_i8_f32(float %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, lr} ; SOFT-NEXT: push {r4, r5, r6, lr} -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: ldr r1, .LCPI1_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mov r5, r0 ; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bl __aeabi_f2uiz -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB1_3 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: bne .LBB1_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB1_4 +; SOFT-NEXT: mov r4, r6 ; SOFT-NEXT: .LBB1_2: -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB1_3: +; SOFT-NEXT: ldr r1, .LCPI1_0 ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB1_2 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB1_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: movs r4, #255 ; SOFT-NEXT: .LBB1_4: -; SOFT-NEXT: movs r0, #255 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: pop {r4, r5, r6, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.5: @@ -163,29 +158,27 @@ define i13 @test_signed_i13_f32(float %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, lr} ; SOFT-NEXT: push {r4, r5, r6, lr} -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: ldr r1, .LCPI2_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mov r5, r0 ; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bl __aeabi_f2uiz -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB2_3 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: bne .LBB2_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB2_4 +; SOFT-NEXT: mov r4, r6 ; SOFT-NEXT: .LBB2_2: -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB2_3: +; SOFT-NEXT: ldr r1, .LCPI2_0 ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB2_2 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB2_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: ldr r4, .LCPI2_1 ; SOFT-NEXT: .LBB2_4: -; SOFT-NEXT: ldr r0, .LCPI2_1 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: pop {r4, r5, r6, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.5: @@ -239,29 +232,27 @@ define i16 @test_signed_i16_f32(float %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, lr} ; SOFT-NEXT: push {r4, r5, r6, lr} -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: ldr r1, .LCPI3_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mov r5, r0 ; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bl __aeabi_f2uiz -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB3_3 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: bne .LBB3_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB3_4 +; SOFT-NEXT: mov r4, r6 ; SOFT-NEXT: .LBB3_2: -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB3_3: +; SOFT-NEXT: ldr r1, .LCPI3_0 ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB3_2 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB3_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: ldr r4, .LCPI3_1 ; SOFT-NEXT: .LBB3_4: -; SOFT-NEXT: ldr r0, .LCPI3_1 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: pop {r4, r5, r6, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.5: @@ -315,29 +306,27 @@ define i19 @test_signed_i19_f32(float %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, lr} ; SOFT-NEXT: push {r4, r5, r6, lr} -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: ldr r1, .LCPI4_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mov r5, r0 ; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bl __aeabi_f2uiz -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB4_3 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: bne .LBB4_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB4_4 +; SOFT-NEXT: mov r4, r6 ; SOFT-NEXT: .LBB4_2: -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB4_3: +; SOFT-NEXT: ldr r1, .LCPI4_0 ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB4_2 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB4_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: ldr r4, .LCPI4_1 ; SOFT-NEXT: .LBB4_4: -; SOFT-NEXT: ldr r0, .LCPI4_1 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: pop {r4, r5, r6, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.5: @@ -394,31 +383,28 @@ define i32 @test_signed_i32_f32(float %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, r7, lr} ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI5_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: mov r6, r0 ; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_f2uiz -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB5_3 +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bne .LBB5_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB5_4 +; SOFT-NEXT: mov r5, r7 ; SOFT-NEXT: .LBB5_2: -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB5_3: +; SOFT-NEXT: ldr r1, .LCPI5_0 ; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB5_2 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB5_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: mvns r5, r4 ; SOFT-NEXT: .LBB5_4: -; SOFT-NEXT: mvns r0, r4 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 @@ -443,45 +429,39 @@ define i50 @test_signed_i50_f32(float %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, r7, lr} ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI6_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_f2ulz -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB6_5 -; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r7, r1 ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB6_6 +; SOFT-NEXT: bne .LBB6_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r7, r4 ; SOFT-NEXT: .LBB6_2: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB6_7 -; SOFT-NEXT: .LBB6_3: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB6_8 +; SOFT-NEXT: bne .LBB6_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: .LBB6_4: -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB6_5: -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB6_2 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: ldr r1, .LCPI6_0 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mvns r0, r5 +; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: bne .LBB6_6 +; SOFT-NEXT: @ %bb.5: +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: .LBB6_6: -; SOFT-NEXT: mvns r0, r6 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB6_3 -; SOFT-NEXT: .LBB6_7: -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB6_4 +; SOFT-NEXT: beq .LBB6_8 +; SOFT-NEXT: @ %bb.7: +; SOFT-NEXT: ldr r7, .LCPI6_1 ; SOFT-NEXT: .LBB6_8: -; SOFT-NEXT: ldr r1, .LCPI6_1 +; SOFT-NEXT: mov r1, r7 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 @@ -528,48 +508,43 @@ define i64 @test_signed_i64_f32(float %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, r7, lr} ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI7_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bl __aeabi_fcmpge ; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_f2ulz -; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: mov r7, r0 ; SOFT-NEXT: cmp r5, #0 ; SOFT-NEXT: bne .LBB7_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: .LBB7_2: -; SOFT-NEXT: mvns r2, r6 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: beq .LBB7_7 +; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: bne .LBB7_4 ; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB7_8 +; SOFT-NEXT: mov r7, r5 ; SOFT-NEXT: .LBB7_4: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB7_6 -; SOFT-NEXT: .LBB7_5: -; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: ldr r1, .LCPI7_0 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: mvns r1, r4 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: beq .LBB7_7 +; SOFT-NEXT: @ %bb.5: +; SOFT-NEXT: beq .LBB7_8 ; SOFT-NEXT: .LBB7_6: -; SOFT-NEXT: mov r1, r2 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB7_7: -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB7_4 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: bne .LBB7_6 ; SOFT-NEXT: .LBB7_8: -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB7_5 -; SOFT-NEXT: b .LBB7_6 +; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload +; SOFT-NEXT: add sp, #4 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.9: ; SOFT-NEXT: .LCPI7_0: @@ -609,76 +584,71 @@ define i100 @test_signed_i100_f32(float %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI8_0 -; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: .pad #12 +; SOFT-NEXT: sub sp, #12 ; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: str r2, [sp] @ 4-byte Spill -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB8_2 +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB8_11 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: beq .LBB8_12 ; SOFT-NEXT: .LBB8_2: -; SOFT-NEXT: mvns r2, r6 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r6, r2 ; SOFT-NEXT: bne .LBB8_4 -; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: .LBB8_3: +; SOFT-NEXT: mov r1, r7 ; SOFT-NEXT: .LBB8_4: -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: bne .LBB8_6 ; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: mov r5, r7 ; SOFT-NEXT: .LBB8_6: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: ldr r1, .LCPI8_0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: mvns r2, r6 +; SOFT-NEXT: movs r3, #15 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: beq .LBB8_13 ; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r1, r2 ; SOFT-NEXT: beq .LBB8_14 ; SOFT-NEXT: .LBB8_8: -; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: beq .LBB8_15 ; SOFT-NEXT: .LBB8_9: -; SOFT-NEXT: cmp r5, #0 ; SOFT-NEXT: beq .LBB8_16 ; SOFT-NEXT: .LBB8_10: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB8_12 +; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB8_11: -; SOFT-NEXT: movs r3, #15 +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bne .LBB8_2 ; SOFT-NEXT: .LBB8_12: -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: beq .LBB8_3 +; SOFT-NEXT: b .LBB8_4 ; SOFT-NEXT: .LBB8_13: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r2 ; SOFT-NEXT: bne .LBB8_8 ; SOFT-NEXT: .LBB8_14: -; SOFT-NEXT: str r5, [sp] @ 4-byte Spill -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload ; SOFT-NEXT: bne .LBB8_9 ; SOFT-NEXT: .LBB8_15: -; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: bne .LBB8_10 ; SOFT-NEXT: .LBB8_16: -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB8_11 -; SOFT-NEXT: b .LBB8_12 +; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.17: ; SOFT-NEXT: .LCPI8_0: @@ -722,75 +692,72 @@ define i128 @test_signed_i128_f32(float %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI9_0 -; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: .pad #12 +; SOFT-NEXT: sub sp, #12 ; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: str r3, [sp] @ 4-byte Spill -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB9_2 +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB9_11 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: beq .LBB9_12 ; SOFT-NEXT: .LBB9_2: -; SOFT-NEXT: mvns r6, r6 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r3, r6 ; SOFT-NEXT: bne .LBB9_4 -; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: .LBB9_3: +; SOFT-NEXT: mov r1, r7 ; SOFT-NEXT: .LBB9_4: -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: bne .LBB9_6 ; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: mov r5, r7 ; SOFT-NEXT: .LBB9_6: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bne .LBB9_8 -; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: .LBB9_8: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB9_10 -; SOFT-NEXT: @ %bb.9: -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: .LBB9_10: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: ldr r1, .LCPI9_0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: mvns r3, r6 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: beq .LBB9_13 +; SOFT-NEXT: @ %bb.7: +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: beq .LBB9_14 +; SOFT-NEXT: .LBB9_8: +; SOFT-NEXT: mov r2, r3 ; SOFT-NEXT: beq .LBB9_15 -; SOFT-NEXT: @ %bb.11: -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: .LBB9_9: ; SOFT-NEXT: beq .LBB9_16 +; SOFT-NEXT: .LBB9_10: +; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB9_11: +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bne .LBB9_2 ; SOFT-NEXT: .LBB9_12: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB9_14 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: beq .LBB9_3 +; SOFT-NEXT: b .LBB9_4 ; SOFT-NEXT: .LBB9_13: -; SOFT-NEXT: ldr r6, [sp] @ 4-byte Reload +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: bne .LBB9_8 ; SOFT-NEXT: .LBB9_14: -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload +; SOFT-NEXT: mov r2, r3 +; SOFT-NEXT: bne .LBB9_9 ; SOFT-NEXT: .LBB9_15: -; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB9_12 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: bne .LBB9_10 ; SOFT-NEXT: .LBB9_16: -; SOFT-NEXT: str r5, [sp] @ 4-byte Spill -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB9_13 -; SOFT-NEXT: b .LBB9_14 +; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.17: ; SOFT-NEXT: .LCPI9_0: @@ -852,33 +819,32 @@ define i1 @test_signed_i1_f64(double %f) nounwind { ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: movs r7, #0 -; SOFT-NEXT: ldr r3, .LCPI10_0 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: bl __aeabi_dcmpgt ; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r6 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bl __aeabi_d2uiz ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB10_3 +; SOFT-NEXT: bne .LBB10_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB10_4 +; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: .LBB10_2: +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: ldr r3, .LCPI10_0 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: bne .LBB10_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB10_3: -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB10_2 ; SOFT-NEXT: .LBB10_4: ; SOFT-NEXT: movs r0, #1 ; SOFT-NEXT: add sp, #4 @@ -931,33 +897,32 @@ define i8 @test_signed_i8_f64(double %f) nounwind { ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: movs r7, #0 -; SOFT-NEXT: ldr r3, .LCPI11_0 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: bl __aeabi_dcmpgt ; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r6 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bl __aeabi_d2uiz ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB11_3 +; SOFT-NEXT: bne .LBB11_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB11_4 +; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: .LBB11_2: +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: ldr r3, .LCPI11_0 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: bne .LBB11_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB11_3: -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB11_2 ; SOFT-NEXT: .LBB11_4: ; SOFT-NEXT: movs r0, #255 ; SOFT-NEXT: add sp, #4 @@ -1018,33 +983,32 @@ define i13 @test_signed_i13_f64(double %f) nounwind { ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: movs r7, #0 -; SOFT-NEXT: ldr r3, .LCPI12_0 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: bl __aeabi_dcmpgt ; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r6 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bl __aeabi_d2uiz ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB12_3 +; SOFT-NEXT: bne .LBB12_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB12_4 +; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: .LBB12_2: +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: ldr r3, .LCPI12_0 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: bne .LBB12_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB12_3: -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB12_2 ; SOFT-NEXT: .LBB12_4: ; SOFT-NEXT: ldr r0, .LCPI12_1 ; SOFT-NEXT: add sp, #4 @@ -1107,33 +1071,32 @@ define i16 @test_signed_i16_f64(double %f) nounwind { ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: movs r7, #0 -; SOFT-NEXT: ldr r3, .LCPI13_0 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: bl __aeabi_dcmpgt ; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r6 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bl __aeabi_d2uiz ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB13_3 +; SOFT-NEXT: bne .LBB13_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB13_4 +; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: .LBB13_2: +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: ldr r3, .LCPI13_0 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: bne .LBB13_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB13_3: -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB13_2 ; SOFT-NEXT: .LBB13_4: ; SOFT-NEXT: ldr r0, .LCPI13_1 ; SOFT-NEXT: add sp, #4 @@ -1196,33 +1159,32 @@ define i19 @test_signed_i19_f64(double %f) nounwind { ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: mov r4, r1 -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: movs r7, #0 -; SOFT-NEXT: ldr r3, .LCPI14_0 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: bl __aeabi_dcmpgt ; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: movs r6, #0 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r3, r6 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bl __aeabi_d2uiz ; SOFT-NEXT: cmp r7, #0 -; SOFT-NEXT: beq .LBB14_3 +; SOFT-NEXT: bne .LBB14_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB14_4 +; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: .LBB14_2: +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: ldr r3, .LCPI14_0 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: bne .LBB14_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB14_3: -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB14_2 ; SOFT-NEXT: .LBB14_4: ; SOFT-NEXT: ldr r0, .LCPI14_1 ; SOFT-NEXT: add sp, #4 @@ -1286,32 +1248,34 @@ define i32 @test_signed_i32_f64(double %f) nounwind { ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: mov r5, r1 -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r2, .LCPI15_0 -; SOFT-NEXT: ldr r3, .LCPI15_1 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill +; SOFT-NEXT: mov r6, r0 ; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: mov r2, r4 ; SOFT-NEXT: mov r3, r4 ; SOFT-NEXT: bl __aeabi_dcmpge -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: bl __aeabi_d2uiz -; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: cmp r7, #0 ; SOFT-NEXT: bne .LBB15_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: .LBB15_2: -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: beq .LBB15_4 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: ldr r2, .LCPI15_0 +; SOFT-NEXT: ldr r3, .LCPI15_1 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: bne .LBB15_4 ; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mvns r0, r4 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: add sp, #4 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB15_4: +; SOFT-NEXT: mvns r0, r4 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 @@ -1345,49 +1309,47 @@ define i50 @test_signed_i50_f64(double %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, r7, lr} ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 -; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r0, #7 -; SOFT-NEXT: mvns r2, r0 -; SOFT-NEXT: ldr r3, .LCPI16_0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: bl __aeabi_dcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: bl __aeabi_d2ulz -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: bne .LBB16_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: .LBB16_2: -; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB16_6 +; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: bne .LBB16_4 ; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB16_7 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: .LBB16_4: -; SOFT-NEXT: cmp r2, #0 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: movs r0, #7 +; SOFT-NEXT: mvns r2, r0 +; SOFT-NEXT: ldr r3, .LCPI16_0 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mvns r0, r5 +; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: beq .LBB16_7 +; SOFT-NEXT: @ %bb.5: +; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload ; SOFT-NEXT: bne .LBB16_8 -; SOFT-NEXT: .LBB16_5: +; SOFT-NEXT: .LBB16_6: ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB16_6: -; SOFT-NEXT: mvns r0, r6 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB16_4 ; SOFT-NEXT: .LBB16_7: -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: beq .LBB16_5 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload +; SOFT-NEXT: beq .LBB16_6 ; SOFT-NEXT: .LBB16_8: ; SOFT-NEXT: ldr r1, .LCPI16_1 ; SOFT-NEXT: add sp, #4 @@ -1458,54 +1420,46 @@ define i64 @test_signed_i64_f64(double %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, r7, lr} ; SOFT-NEXT: .pad #4 ; SOFT-NEXT: sub sp, #4 -; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: mov r4, r1 ; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mvns r4, r6 -; SOFT-NEXT: ldr r3, .LCPI17_0 -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: movs r7, #0 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: mov r3, r7 ; SOFT-NEXT: bl __aeabi_dcmpge ; SOFT-NEXT: mov r6, r0 ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bl __aeabi_d2ulz -; SOFT-NEXT: mov r2, r0 ; SOFT-NEXT: cmp r6, #0 ; SOFT-NEXT: bne .LBB17_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: .LBB17_2: -; SOFT-NEXT: ldr r3, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: beq .LBB17_7 +; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: bne .LBB17_4 ; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB17_8 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: .LBB17_4: -; SOFT-NEXT: cmp r3, #0 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mvns r7, r7 +; SOFT-NEXT: ldr r3, .LCPI17_0 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: bne .LBB17_6 -; SOFT-NEXT: .LBB17_5: -; SOFT-NEXT: mov r4, r1 +; SOFT-NEXT: @ %bb.5: +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: .LBB17_6: -; SOFT-NEXT: mov r1, r4 +; SOFT-NEXT: bne .LBB17_8 +; SOFT-NEXT: @ %bb.7: +; SOFT-NEXT: ldr r7, [sp] @ 4-byte Reload +; SOFT-NEXT: .LBB17_8: +; SOFT-NEXT: mov r1, r7 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB17_7: -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB17_4 -; SOFT-NEXT: .LBB17_8: -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: cmp r3, #0 -; SOFT-NEXT: beq .LBB17_5 -; SOFT-NEXT: b .LBB17_6 ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.9: ; SOFT-NEXT: .LCPI17_0: @@ -1573,82 +1527,76 @@ define i100 @test_signed_i100_f64(double %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 -; SOFT-NEXT: mov r7, r1 +; SOFT-NEXT: .pad #12 +; SOFT-NEXT: sub sp, #12 +; SOFT-NEXT: mov r6, r1 ; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mvns r2, r6 -; SOFT-NEXT: ldr r3, .LCPI18_0 -; SOFT-NEXT: str r2, [sp] @ 4-byte Spill -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: bl __aeabi_dcmpge -; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r7, r0 ; SOFT-NEXT: mov r0, r4 -; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: bl __fixunsdfti -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB18_2 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB18_12 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r4, r6 +; SOFT-NEXT: beq .LBB18_13 ; SOFT-NEXT: .LBB18_2: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload ; SOFT-NEXT: bne .LBB18_4 -; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: .LBB18_3: +; SOFT-NEXT: mov r1, r7 ; SOFT-NEXT: .LBB18_4: -; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: bne .LBB18_6 ; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: .LBB18_6: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: ldr r4, [sp] @ 4-byte Reload -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: beq .LBB18_13 -; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: mvns r5, r5 +; SOFT-NEXT: ldr r3, .LCPI18_0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: movs r3, #15 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: beq .LBB18_14 -; SOFT-NEXT: .LBB18_8: -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: @ %bb.7: +; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: beq .LBB18_15 -; SOFT-NEXT: .LBB18_9: -; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: .LBB18_8: ; SOFT-NEXT: beq .LBB18_16 +; SOFT-NEXT: .LBB18_9: +; SOFT-NEXT: bne .LBB18_11 ; SOFT-NEXT: .LBB18_10: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB18_12 +; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload ; SOFT-NEXT: .LBB18_11: -; SOFT-NEXT: movs r3, #15 -; SOFT-NEXT: .LBB18_12: -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: add sp, #4 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: add sp, #12 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB18_12: +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bne .LBB18_2 ; SOFT-NEXT: .LBB18_13: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB18_8 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: beq .LBB18_3 +; SOFT-NEXT: b .LBB18_4 ; SOFT-NEXT: .LBB18_14: -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB18_9 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: bne .LBB18_8 ; SOFT-NEXT: .LBB18_15: -; SOFT-NEXT: mov r4, r2 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB18_10 +; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload +; SOFT-NEXT: bne .LBB18_9 ; SOFT-NEXT: .LBB18_16: -; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB18_11 -; SOFT-NEXT: b .LBB18_12 +; SOFT-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: beq .LBB18_10 +; SOFT-NEXT: b .LBB18_11 ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.17: ; SOFT-NEXT: .LCPI18_0: @@ -1724,78 +1672,77 @@ define i128 @test_signed_i128_f64(double %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 -; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mvns r4, r6 -; SOFT-NEXT: ldr r3, .LCPI19_0 -; SOFT-NEXT: mov r2, r4 -; SOFT-NEXT: bl __aeabi_dcmpgt -; SOFT-NEXT: str r0, [sp] @ 4-byte Spill -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: mov r2, r6 -; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: .pad #12 +; SOFT-NEXT: sub sp, #12 +; SOFT-NEXT: mov r6, r1 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: mov r3, r5 ; SOFT-NEXT: bl __aeabi_dcmpge -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: bl __fixunsdfti -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB19_2 +; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: beq .LBB19_12 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r5, r6 +; SOFT-NEXT: beq .LBB19_13 ; SOFT-NEXT: .LBB19_2: -; SOFT-NEXT: ldr r0, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r0, #0 -; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bne .LBB19_4 -; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: .LBB19_3: +; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: .LBB19_4: -; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: bne .LBB19_6 ; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: .LBB19_6: -; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r1, #0 -; SOFT-NEXT: mov r1, r4 -; SOFT-NEXT: bne .LBB19_8 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mvns r5, r5 +; SOFT-NEXT: ldr r3, .LCPI19_0 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: bl __aeabi_dcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: beq .LBB19_14 ; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: beq .LBB19_15 ; SOFT-NEXT: .LBB19_8: -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB19_10 -; SOFT-NEXT: @ %bb.9: -; SOFT-NEXT: mov r2, r6 +; SOFT-NEXT: mov r2, r5 +; SOFT-NEXT: beq .LBB19_16 +; SOFT-NEXT: .LBB19_9: +; SOFT-NEXT: bne .LBB19_11 ; SOFT-NEXT: .LBB19_10: -; SOFT-NEXT: ldr r5, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: mov r5, r4 -; SOFT-NEXT: bne .LBB19_12 -; SOFT-NEXT: @ %bb.11: -; SOFT-NEXT: mov r5, r2 +; SOFT-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: .LBB19_11: +; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB19_12: -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: bne .LBB19_14 -; SOFT-NEXT: @ %bb.13: -; SOFT-NEXT: mov r3, r6 +; SOFT-NEXT: mov r3, r4 +; SOFT-NEXT: bne .LBB19_2 +; SOFT-NEXT: .LBB19_13: +; SOFT-NEXT: mov r2, r4 +; SOFT-NEXT: beq .LBB19_3 +; SOFT-NEXT: b .LBB19_4 ; SOFT-NEXT: .LBB19_14: -; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r2, #0 -; SOFT-NEXT: bne .LBB19_16 -; SOFT-NEXT: @ %bb.15: -; SOFT-NEXT: mov r4, r3 -; SOFT-NEXT: .LBB19_16: +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: mov r1, r5 +; SOFT-NEXT: bne .LBB19_8 +; SOFT-NEXT: .LBB19_15: +; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload ; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: mov r3, r4 -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: bne .LBB19_9 +; SOFT-NEXT: .LBB19_16: +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: beq .LBB19_10 +; SOFT-NEXT: b .LBB19_11 ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.17: ; SOFT-NEXT: .LCPI19_0: @@ -1889,30 +1836,27 @@ define i1 @test_signed_i1_f16(half %f) nounwind { ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f ; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: movs r0, #127 -; SOFT-NEXT: lsls r1, r0, #23 -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bl __aeabi_fcmpge ; SOFT-NEXT: mov r6, r0 ; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bl __aeabi_f2uiz +; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB20_3 +; SOFT-NEXT: bne .LBB20_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB20_4 +; SOFT-NEXT: mov r4, r6 ; SOFT-NEXT: .LBB20_2: -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB20_3: -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB20_2 +; SOFT-NEXT: movs r0, #127 +; SOFT-NEXT: lsls r1, r0, #23 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB20_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: movs r4, #1 ; SOFT-NEXT: .LBB20_4: -; SOFT-NEXT: movs r0, #1 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: pop {r4, r5, r6, pc} ; ; VFP2-LABEL: test_signed_i1_f16: @@ -1920,13 +1864,13 @@ define i1 @test_signed_i1_f16(half %f) nounwind { ; VFP2-NEXT: .save {r7, lr} ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: vmov s2, r0 -; VFP2-NEXT: vmov.f32 s0, #1.000000e+00 -; VFP2-NEXT: vcvt.u32.f32 s4, s2 -; VFP2-NEXT: vcmp.f32 s2, #0 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vmov.f32 s4, #1.000000e+00 +; VFP2-NEXT: vcvt.u32.f32 s2, s0 +; VFP2-NEXT: vcmp.f32 s0, #0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr -; VFP2-NEXT: vcmp.f32 s2, s0 -; VFP2-NEXT: vmov r0, s4 +; VFP2-NEXT: vcmp.f32 s0, s4 +; VFP2-NEXT: vmov r0, s2 ; VFP2-NEXT: it lt ; VFP2-NEXT: movlt r0, #0 ; VFP2-NEXT: vmrs APSR_nzcv, fpscr @@ -1960,29 +1904,27 @@ define i8 @test_signed_i8_f16(half %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: ldr r1, .LCPI21_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mov r5, r0 ; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bl __aeabi_f2uiz -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB21_3 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: bne .LBB21_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB21_4 +; SOFT-NEXT: mov r4, r6 ; SOFT-NEXT: .LBB21_2: -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB21_3: +; SOFT-NEXT: ldr r1, .LCPI21_0 ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB21_2 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB21_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: movs r4, #255 ; SOFT-NEXT: .LBB21_4: -; SOFT-NEXT: movs r0, #255 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: pop {r4, r5, r6, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.5: @@ -2040,29 +1982,27 @@ define i13 @test_signed_i13_f16(half %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: ldr r1, .LCPI22_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mov r5, r0 ; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bl __aeabi_f2uiz -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB22_3 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: bne .LBB22_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB22_4 +; SOFT-NEXT: mov r4, r6 ; SOFT-NEXT: .LBB22_2: -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB22_3: +; SOFT-NEXT: ldr r1, .LCPI22_0 ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB22_2 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB22_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: ldr r4, .LCPI22_1 ; SOFT-NEXT: .LBB22_4: -; SOFT-NEXT: ldr r0, .LCPI22_1 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: pop {r4, r5, r6, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.5: @@ -2122,29 +2062,27 @@ define i16 @test_signed_i16_f16(half %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: ldr r1, .LCPI23_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mov r5, r0 ; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bl __aeabi_f2uiz -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB23_3 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: bne .LBB23_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB23_4 +; SOFT-NEXT: mov r4, r6 ; SOFT-NEXT: .LBB23_2: -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB23_3: +; SOFT-NEXT: ldr r1, .LCPI23_0 ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB23_2 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB23_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: ldr r4, .LCPI23_1 ; SOFT-NEXT: .LBB23_4: -; SOFT-NEXT: ldr r0, .LCPI23_1 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: pop {r4, r5, r6, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.5: @@ -2204,29 +2142,27 @@ define i19 @test_signed_i19_f16(half %f) nounwind { ; SOFT-NEXT: push {r4, r5, r6, lr} ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: ldr r1, .LCPI24_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mov r5, r0 ; SOFT-NEXT: movs r1, #0 -; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: bl __aeabi_f2uiz -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB24_3 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: cmp r6, #0 +; SOFT-NEXT: bne .LBB24_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB24_4 +; SOFT-NEXT: mov r4, r6 ; SOFT-NEXT: .LBB24_2: -; SOFT-NEXT: pop {r4, r5, r6, pc} -; SOFT-NEXT: .LBB24_3: +; SOFT-NEXT: ldr r1, .LCPI24_0 ; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB24_2 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB24_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: ldr r4, .LCPI24_1 ; SOFT-NEXT: .LBB24_4: -; SOFT-NEXT: ldr r0, .LCPI24_1 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: pop {r4, r5, r6, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.5: @@ -2289,31 +2225,28 @@ define i32 @test_signed_i32_f16(half %f) nounwind { ; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI25_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: mov r6, r0 ; SOFT-NEXT: movs r4, #0 -; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r6, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_f2uiz -; SOFT-NEXT: cmp r6, #0 -; SOFT-NEXT: beq .LBB25_3 +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: bne .LBB25_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB25_4 +; SOFT-NEXT: mov r5, r7 ; SOFT-NEXT: .LBB25_2: -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB25_3: +; SOFT-NEXT: ldr r1, .LCPI25_0 ; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB25_2 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: beq .LBB25_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: mvns r5, r4 ; SOFT-NEXT: .LBB25_4: -; SOFT-NEXT: mvns r0, r4 +; SOFT-NEXT: mov r0, r5 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 @@ -2350,45 +2283,39 @@ define i50 @test_signed_i50_f16(half %f) nounwind { ; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI26_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: movs r5, #0 +; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_f2ulz -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB26_5 -; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r7, r1 ; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB26_6 +; SOFT-NEXT: bne .LBB26_2 +; SOFT-NEXT: @ %bb.1: +; SOFT-NEXT: mov r7, r4 ; SOFT-NEXT: .LBB26_2: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB26_7 -; SOFT-NEXT: .LBB26_3: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB26_8 +; SOFT-NEXT: bne .LBB26_4 +; SOFT-NEXT: @ %bb.3: +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: .LBB26_4: -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} -; SOFT-NEXT: .LBB26_5: -; SOFT-NEXT: mov r0, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB26_2 +; SOFT-NEXT: mov r4, r0 +; SOFT-NEXT: ldr r1, .LCPI26_0 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: mov r1, r0 +; SOFT-NEXT: mvns r0, r5 +; SOFT-NEXT: cmp r1, #0 +; SOFT-NEXT: bne .LBB26_6 +; SOFT-NEXT: @ %bb.5: +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: .LBB26_6: -; SOFT-NEXT: mvns r0, r6 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB26_3 -; SOFT-NEXT: .LBB26_7: -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB26_4 +; SOFT-NEXT: beq .LBB26_8 +; SOFT-NEXT: @ %bb.7: +; SOFT-NEXT: ldr r7, .LCPI26_1 ; SOFT-NEXT: .LBB26_8: -; SOFT-NEXT: ldr r1, .LCPI26_1 +; SOFT-NEXT: mov r1, r7 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 @@ -2465,48 +2392,43 @@ define i64 @test_signed_i64_f16(half %f) nounwind { ; SOFT-NEXT: sub sp, #4 ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI27_0 -; SOFT-NEXT: bl __aeabi_fcmpgt -; SOFT-NEXT: mov r4, r0 -; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mov r0, r7 -; SOFT-NEXT: mov r1, r6 +; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: movs r4, #0 +; SOFT-NEXT: mov r1, r4 ; SOFT-NEXT: bl __aeabi_fcmpge ; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r0, r6 ; SOFT-NEXT: bl __aeabi_f2ulz -; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: mov r7, r0 ; SOFT-NEXT: cmp r5, #0 ; SOFT-NEXT: bne .LBB27_2 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r3, r5 +; SOFT-NEXT: mov r1, r5 ; SOFT-NEXT: .LBB27_2: -; SOFT-NEXT: mvns r2, r6 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r0, r2 -; SOFT-NEXT: beq .LBB27_7 +; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: bne .LBB27_4 ; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: beq .LBB27_8 +; SOFT-NEXT: mov r7, r5 ; SOFT-NEXT: .LBB27_4: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB27_6 -; SOFT-NEXT: .LBB27_5: -; SOFT-NEXT: mov r2, r1 +; SOFT-NEXT: ldr r1, .LCPI27_0 +; SOFT-NEXT: mov r0, r6 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: mvns r1, r4 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r1 +; SOFT-NEXT: beq .LBB27_7 +; SOFT-NEXT: @ %bb.5: +; SOFT-NEXT: beq .LBB27_8 ; SOFT-NEXT: .LBB27_6: -; SOFT-NEXT: mov r1, r2 ; SOFT-NEXT: add sp, #4 ; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB27_7: -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB27_4 +; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: bne .LBB27_6 ; SOFT-NEXT: .LBB27_8: -; SOFT-NEXT: mov r1, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB27_5 -; SOFT-NEXT: b .LBB27_6 +; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload +; SOFT-NEXT: add sp, #4 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.9: ; SOFT-NEXT: .LCPI27_0: @@ -2573,78 +2495,73 @@ define i100 @test_signed_i100_f16(half %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 +; SOFT-NEXT: .pad #12 +; SOFT-NEXT: sub sp, #12 ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI28_0 -; SOFT-NEXT: bl __aeabi_fcmpgt ; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: str r2, [sp] @ 4-byte Spill -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB28_2 +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB28_11 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: beq .LBB28_12 ; SOFT-NEXT: .LBB28_2: -; SOFT-NEXT: mvns r2, r6 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r6, r2 ; SOFT-NEXT: bne .LBB28_4 -; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mov r6, r0 +; SOFT-NEXT: .LBB28_3: +; SOFT-NEXT: mov r1, r7 ; SOFT-NEXT: .LBB28_4: -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: bne .LBB28_6 ; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: mov r5, r7 ; SOFT-NEXT: .LBB28_6: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r1, r2 +; SOFT-NEXT: ldr r1, .LCPI28_0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: mvns r2, r6 +; SOFT-NEXT: movs r3, #15 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r2 ; SOFT-NEXT: beq .LBB28_13 ; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r1, r2 ; SOFT-NEXT: beq .LBB28_14 ; SOFT-NEXT: .LBB28_8: -; SOFT-NEXT: cmp r4, #0 ; SOFT-NEXT: beq .LBB28_15 ; SOFT-NEXT: .LBB28_9: -; SOFT-NEXT: cmp r5, #0 ; SOFT-NEXT: beq .LBB28_16 ; SOFT-NEXT: .LBB28_10: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB28_12 +; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .LBB28_11: -; SOFT-NEXT: movs r3, #15 +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bne .LBB28_2 ; SOFT-NEXT: .LBB28_12: -; SOFT-NEXT: mov r0, r6 -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: beq .LBB28_3 +; SOFT-NEXT: b .LBB28_4 ; SOFT-NEXT: .LBB28_13: -; SOFT-NEXT: mov r1, r7 -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r2 ; SOFT-NEXT: bne .LBB28_8 ; SOFT-NEXT: .LBB28_14: -; SOFT-NEXT: str r5, [sp] @ 4-byte Spill -; SOFT-NEXT: cmp r4, #0 +; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload ; SOFT-NEXT: bne .LBB28_9 ; SOFT-NEXT: .LBB28_15: -; SOFT-NEXT: ldr r2, [sp] @ 4-byte Reload -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload ; SOFT-NEXT: bne .LBB28_10 ; SOFT-NEXT: .LBB28_16: -; SOFT-NEXT: mov r3, r5 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB28_11 -; SOFT-NEXT: b .LBB28_12 +; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.17: ; SOFT-NEXT: .LCPI28_0: @@ -2719,77 +2636,74 @@ define i128 @test_signed_i128_f16(half %f) nounwind { ; SOFT: @ %bb.0: ; SOFT-NEXT: .save {r4, r5, r6, r7, lr} ; SOFT-NEXT: push {r4, r5, r6, r7, lr} -; SOFT-NEXT: .pad #4 -; SOFT-NEXT: sub sp, #4 +; SOFT-NEXT: .pad #12 +; SOFT-NEXT: sub sp, #12 ; SOFT-NEXT: uxth r0, r0 ; SOFT-NEXT: bl __aeabi_h2f -; SOFT-NEXT: mov r7, r0 -; SOFT-NEXT: ldr r1, .LCPI29_0 -; SOFT-NEXT: bl __aeabi_fcmpgt ; SOFT-NEXT: mov r4, r0 ; SOFT-NEXT: movs r6, #0 -; SOFT-NEXT: mov r0, r7 ; SOFT-NEXT: mov r1, r6 ; SOFT-NEXT: bl __aeabi_fcmpge -; SOFT-NEXT: mov r5, r0 -; SOFT-NEXT: mov r0, r7 +; SOFT-NEXT: mov r7, r0 +; SOFT-NEXT: mov r0, r4 ; SOFT-NEXT: bl __fixunssfti -; SOFT-NEXT: mov r7, r1 -; SOFT-NEXT: str r3, [sp] @ 4-byte Spill -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB29_2 +; SOFT-NEXT: mov r5, r0 +; SOFT-NEXT: cmp r7, #0 +; SOFT-NEXT: beq .LBB29_11 ; SOFT-NEXT: @ %bb.1: -; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: beq .LBB29_12 ; SOFT-NEXT: .LBB29_2: -; SOFT-NEXT: mvns r6, r6 -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r3, r6 ; SOFT-NEXT: bne .LBB29_4 -; SOFT-NEXT: @ %bb.3: -; SOFT-NEXT: mov r3, r0 +; SOFT-NEXT: .LBB29_3: +; SOFT-NEXT: mov r1, r7 ; SOFT-NEXT: .LBB29_4: -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: str r1, [sp] @ 4-byte Spill +; SOFT-NEXT: str r2, [sp, #4] @ 4-byte Spill +; SOFT-NEXT: str r3, [sp, #8] @ 4-byte Spill ; SOFT-NEXT: bne .LBB29_6 ; SOFT-NEXT: @ %bb.5: -; SOFT-NEXT: mov r7, r5 +; SOFT-NEXT: mov r5, r7 ; SOFT-NEXT: .LBB29_6: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r1, r6 -; SOFT-NEXT: bne .LBB29_8 +; SOFT-NEXT: ldr r1, .LCPI29_0 +; SOFT-NEXT: mov r0, r4 +; SOFT-NEXT: bl __aeabi_fcmpgt +; SOFT-NEXT: mvns r3, r6 +; SOFT-NEXT: cmp r0, #0 +; SOFT-NEXT: mov r0, r3 +; SOFT-NEXT: beq .LBB29_13 ; SOFT-NEXT: @ %bb.7: -; SOFT-NEXT: mov r1, r7 +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: beq .LBB29_14 ; SOFT-NEXT: .LBB29_8: -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB29_10 -; SOFT-NEXT: @ %bb.9: -; SOFT-NEXT: mov r2, r5 -; SOFT-NEXT: .LBB29_10: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: mov r7, r6 +; SOFT-NEXT: mov r2, r3 ; SOFT-NEXT: beq .LBB29_15 -; SOFT-NEXT: @ %bb.11: -; SOFT-NEXT: cmp r5, #0 +; SOFT-NEXT: .LBB29_9: ; SOFT-NEXT: beq .LBB29_16 +; SOFT-NEXT: .LBB29_10: +; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: .LBB29_11: +; SOFT-NEXT: mov r3, r7 +; SOFT-NEXT: bne .LBB29_2 ; SOFT-NEXT: .LBB29_12: -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: bne .LBB29_14 +; SOFT-NEXT: mov r2, r7 +; SOFT-NEXT: beq .LBB29_3 +; SOFT-NEXT: b .LBB29_4 ; SOFT-NEXT: .LBB29_13: -; SOFT-NEXT: ldr r6, [sp] @ 4-byte Reload +; SOFT-NEXT: mov r0, r5 +; SOFT-NEXT: mov r1, r3 +; SOFT-NEXT: bne .LBB29_8 ; SOFT-NEXT: .LBB29_14: -; SOFT-NEXT: mov r0, r3 -; SOFT-NEXT: mov r2, r7 -; SOFT-NEXT: mov r3, r6 -; SOFT-NEXT: add sp, #4 -; SOFT-NEXT: pop {r4, r5, r6, r7, pc} +; SOFT-NEXT: ldr r1, [sp] @ 4-byte Reload +; SOFT-NEXT: mov r2, r3 +; SOFT-NEXT: bne .LBB29_9 ; SOFT-NEXT: .LBB29_15: -; SOFT-NEXT: mov r7, r2 -; SOFT-NEXT: cmp r5, #0 -; SOFT-NEXT: bne .LBB29_12 +; SOFT-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; SOFT-NEXT: bne .LBB29_10 ; SOFT-NEXT: .LBB29_16: -; SOFT-NEXT: str r5, [sp] @ 4-byte Spill -; SOFT-NEXT: cmp r4, #0 -; SOFT-NEXT: beq .LBB29_13 -; SOFT-NEXT: b .LBB29_14 +; SOFT-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; SOFT-NEXT: add sp, #12 +; SOFT-NEXT: pop {r4, r5, r6, r7, pc} ; SOFT-NEXT: .p2align 2 ; SOFT-NEXT: @ %bb.17: ; SOFT-NEXT: .LCPI29_0: diff --git a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll index de5bd2a7040b9..a1b6847d623d0 100644 --- a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll @@ -71,12 +71,12 @@ define i64 @rotl_i64(i64 %x, i64 %z) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: ands r3, r2, #32 -; CHECK-NEXT: and r12, r2, #31 +; CHECK-NEXT: tst r2, #32 ; CHECK-NEXT: mov r3, r0 -; CHECK-NEXT: mov r4, #31 +; CHECK-NEXT: and r12, r2, #31 ; CHECK-NEXT: movne r3, r1 ; CHECK-NEXT: movne r1, r0 +; CHECK-NEXT: mov r4, #31 ; CHECK-NEXT: bic r2, r4, r2 ; CHECK-NEXT: lsl lr, r3, r12 ; CHECK-NEXT: lsr r0, r1, #1 @@ -206,7 +206,7 @@ define i32 @rotr_i32(i32 %x, i32 %z) { define i64 @rotr_i64(i64 %x, i64 %z) { ; CHECK-LABEL: rotr_i64: ; CHECK: @ %bb.0: -; CHECK-NEXT: ands r3, r2, #32 +; CHECK-NEXT: tst r2, #32 ; CHECK-NEXT: mov r3, r1 ; CHECK-NEXT: moveq r3, r0 ; CHECK-NEXT: moveq r0, r1 diff --git a/llvm/test/CodeGen/ARM/funnel-shift.ll b/llvm/test/CodeGen/ARM/funnel-shift.ll index 5a7c4384428e1..191155ae30f3e 100644 --- a/llvm/test/CodeGen/ARM/funnel-shift.ll +++ b/llvm/test/CodeGen/ARM/funnel-shift.ll @@ -47,69 +47,67 @@ declare i37 @llvm.fshl.i37(i37, i37, i37) define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { ; SCALAR-LABEL: fshl_i37: ; SCALAR: @ %bb.0: -; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} -; SCALAR-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} +; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr} +; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr} ; SCALAR-NEXT: mov r8, r0 -; SCALAR-NEXT: ldr r0, [sp, #36] +; SCALAR-NEXT: ldr r0, [sp, #28] ; SCALAR-NEXT: mov r4, r1 -; SCALAR-NEXT: mov r6, r3 +; SCALAR-NEXT: mov r5, r3 ; SCALAR-NEXT: and r1, r0, #31 -; SCALAR-NEXT: ldr r0, [sp, #32] -; SCALAR-NEXT: mov r9, r2 +; SCALAR-NEXT: ldr r0, [sp, #24] +; SCALAR-NEXT: mov r6, r2 ; SCALAR-NEXT: mov r2, #37 ; SCALAR-NEXT: mov r3, #0 ; SCALAR-NEXT: bl __aeabi_uldivmod -; SCALAR-NEXT: lsl r1, r6, #27 -; SCALAR-NEXT: ands r0, r2, #32 -; SCALAR-NEXT: orr r1, r1, r9, lsr #5 -; SCALAR-NEXT: mov r3, r8 -; SCALAR-NEXT: and r6, r2, #31 +; SCALAR-NEXT: lsl r0, r5, #27 +; SCALAR-NEXT: tst r2, #32 +; SCALAR-NEXT: orr r0, r0, r6, lsr #5 +; SCALAR-NEXT: mov r1, r8 +; SCALAR-NEXT: and r3, r2, #31 ; SCALAR-NEXT: mov r7, #31 -; SCALAR-NEXT: movne r3, r1 -; SCALAR-NEXT: cmp r0, #0 -; SCALAR-NEXT: lslne r1, r9, #27 +; SCALAR-NEXT: movne r1, r0 +; SCALAR-NEXT: lslne r0, r6, #27 ; SCALAR-NEXT: bic r2, r7, r2 +; SCALAR-NEXT: lsl r5, r1, r3 +; SCALAR-NEXT: lsr r0, r0, #1 ; SCALAR-NEXT: movne r4, r8 -; SCALAR-NEXT: lsl r5, r3, r6 -; SCALAR-NEXT: lsr r0, r1, #1 -; SCALAR-NEXT: lsl r1, r4, r6 -; SCALAR-NEXT: lsr r3, r3, #1 +; SCALAR-NEXT: lsr r1, r1, #1 +; SCALAR-NEXT: lsl r3, r4, r3 ; SCALAR-NEXT: orr r0, r5, r0, lsr r2 -; SCALAR-NEXT: orr r1, r1, r3, lsr r2 -; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc} +; SCALAR-NEXT: orr r1, r3, r1, lsr r2 +; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc} ; ; NEON-LABEL: fshl_i37: ; NEON: @ %bb.0: -; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} -; NEON-NEXT: push {r4, r5, r6, r7, r11, lr} +; NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} +; NEON-NEXT: push {r4, r5, r6, r7, r8, lr} ; NEON-NEXT: mov r4, r1 ; NEON-NEXT: ldr r1, [sp, #28] -; NEON-NEXT: mov r6, r0 +; NEON-NEXT: mov r8, r0 ; NEON-NEXT: ldr r0, [sp, #24] ; NEON-NEXT: and r1, r1, #31 ; NEON-NEXT: mov r5, r3 -; NEON-NEXT: mov r7, r2 +; NEON-NEXT: mov r6, r2 ; NEON-NEXT: mov r2, #37 ; NEON-NEXT: mov r3, #0 ; NEON-NEXT: bl __aeabi_uldivmod -; NEON-NEXT: mov r0, #31 -; NEON-NEXT: bic r1, r0, r2 ; NEON-NEXT: lsl r0, r5, #27 -; NEON-NEXT: ands r12, r2, #32 -; NEON-NEXT: orr r0, r0, r7, lsr #5 -; NEON-NEXT: mov r5, r6 -; NEON-NEXT: and r2, r2, #31 -; NEON-NEXT: movne r5, r0 -; NEON-NEXT: lslne r0, r7, #27 -; NEON-NEXT: cmp r12, #0 -; NEON-NEXT: lsl r3, r5, r2 +; NEON-NEXT: tst r2, #32 +; NEON-NEXT: orr r0, r0, r6, lsr #5 +; NEON-NEXT: mov r1, r8 +; NEON-NEXT: and r3, r2, #31 +; NEON-NEXT: mov r7, #31 +; NEON-NEXT: movne r1, r0 +; NEON-NEXT: lslne r0, r6, #27 +; NEON-NEXT: bic r2, r7, r2 +; NEON-NEXT: lsl r5, r1, r3 ; NEON-NEXT: lsr r0, r0, #1 -; NEON-NEXT: movne r4, r6 -; NEON-NEXT: orr r0, r3, r0, lsr r1 -; NEON-NEXT: lsr r3, r5, #1 -; NEON-NEXT: lsl r2, r4, r2 -; NEON-NEXT: orr r1, r2, r3, lsr r1 -; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} +; NEON-NEXT: movne r4, r8 +; NEON-NEXT: lsr r1, r1, #1 +; NEON-NEXT: lsl r3, r4, r3 +; NEON-NEXT: orr r0, r5, r0, lsr r2 +; NEON-NEXT: orr r1, r3, r1, lsr r2 +; NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) ret i37 %f } @@ -237,71 +235,69 @@ declare i37 @llvm.fshr.i37(i37, i37, i37) define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; SCALAR-LABEL: fshr_i37: ; SCALAR: @ %bb.0: -; SCALAR-NEXT: .save {r4, r5, r6, r7, r8, lr} -; SCALAR-NEXT: push {r4, r5, r6, r7, r8, lr} -; SCALAR-NEXT: mov r8, r0 +; SCALAR-NEXT: .save {r4, r5, r6, r7, r11, lr} +; SCALAR-NEXT: push {r4, r5, r6, r7, r11, lr} +; SCALAR-NEXT: mov r5, r0 ; SCALAR-NEXT: ldr r0, [sp, #28] ; SCALAR-NEXT: mov r4, r1 -; SCALAR-NEXT: mov r5, r3 +; SCALAR-NEXT: mov r6, r3 ; SCALAR-NEXT: and r1, r0, #31 ; SCALAR-NEXT: ldr r0, [sp, #24] ; SCALAR-NEXT: mov r7, r2 ; SCALAR-NEXT: mov r2, #37 ; SCALAR-NEXT: mov r3, #0 ; SCALAR-NEXT: bl __aeabi_uldivmod -; SCALAR-NEXT: lsl r3, r5, #27 ; SCALAR-NEXT: add r0, r2, #27 -; SCALAR-NEXT: orr r3, r3, r7, lsr #5 -; SCALAR-NEXT: ands r2, r0, #32 -; SCALAR-NEXT: mov r5, r8 +; SCALAR-NEXT: lsl r2, r6, #27 +; SCALAR-NEXT: orr r2, r2, r7, lsr #5 ; SCALAR-NEXT: mov r1, #31 -; SCALAR-NEXT: moveq r5, r3 -; SCALAR-NEXT: lsleq r3, r7, #27 -; SCALAR-NEXT: cmp r2, #0 +; SCALAR-NEXT: tst r0, #32 +; SCALAR-NEXT: mov r3, r5 +; SCALAR-NEXT: moveq r3, r2 +; SCALAR-NEXT: lsleq r2, r7, #27 ; SCALAR-NEXT: bic r1, r1, r0 -; SCALAR-NEXT: moveq r4, r8 -; SCALAR-NEXT: lsl r6, r5, #1 ; SCALAR-NEXT: and r7, r0, #31 -; SCALAR-NEXT: lsl r2, r4, #1 +; SCALAR-NEXT: lsl r6, r3, #1 +; SCALAR-NEXT: moveq r4, r5 ; SCALAR-NEXT: lsl r6, r6, r1 +; SCALAR-NEXT: orr r0, r6, r2, lsr r7 +; SCALAR-NEXT: lsl r2, r4, #1 ; SCALAR-NEXT: lsl r1, r2, r1 -; SCALAR-NEXT: orr r0, r6, r3, lsr r7 -; SCALAR-NEXT: orr r1, r1, r5, lsr r7 -; SCALAR-NEXT: pop {r4, r5, r6, r7, r8, pc} +; SCALAR-NEXT: orr r1, r1, r3, lsr r7 +; SCALAR-NEXT: pop {r4, r5, r6, r7, r11, pc} ; ; NEON-LABEL: fshr_i37: ; NEON: @ %bb.0: -; NEON-NEXT: .save {r4, r5, r6, r7, r8, lr} -; NEON-NEXT: push {r4, r5, r6, r7, r8, lr} +; NEON-NEXT: .save {r4, r5, r6, r7, r11, lr} +; NEON-NEXT: push {r4, r5, r6, r7, r11, lr} ; NEON-NEXT: mov r4, r1 ; NEON-NEXT: ldr r1, [sp, #28] -; NEON-NEXT: mov r8, r0 +; NEON-NEXT: mov r5, r0 ; NEON-NEXT: ldr r0, [sp, #24] ; NEON-NEXT: and r1, r1, #31 -; NEON-NEXT: mov r5, r3 +; NEON-NEXT: mov r6, r3 ; NEON-NEXT: mov r7, r2 ; NEON-NEXT: mov r2, #37 ; NEON-NEXT: mov r3, #0 ; NEON-NEXT: bl __aeabi_uldivmod -; NEON-NEXT: lsl r3, r5, #27 ; NEON-NEXT: add r0, r2, #27 -; NEON-NEXT: orr r3, r3, r7, lsr #5 -; NEON-NEXT: ands r2, r0, #32 -; NEON-NEXT: mov r5, r8 +; NEON-NEXT: lsl r2, r6, #27 +; NEON-NEXT: orr r2, r2, r7, lsr #5 ; NEON-NEXT: mov r1, #31 -; NEON-NEXT: moveq r5, r3 -; NEON-NEXT: lsleq r3, r7, #27 -; NEON-NEXT: cmp r2, #0 +; NEON-NEXT: tst r0, #32 +; NEON-NEXT: mov r3, r5 +; NEON-NEXT: moveq r3, r2 +; NEON-NEXT: lsleq r2, r7, #27 ; NEON-NEXT: bic r1, r1, r0 -; NEON-NEXT: moveq r4, r8 -; NEON-NEXT: lsl r6, r5, #1 ; NEON-NEXT: and r7, r0, #31 -; NEON-NEXT: lsl r2, r4, #1 +; NEON-NEXT: lsl r6, r3, #1 +; NEON-NEXT: moveq r4, r5 ; NEON-NEXT: lsl r6, r6, r1 +; NEON-NEXT: orr r0, r6, r2, lsr r7 +; NEON-NEXT: lsl r2, r4, #1 ; NEON-NEXT: lsl r1, r2, r1 -; NEON-NEXT: orr r0, r6, r3, lsr r7 -; NEON-NEXT: orr r1, r1, r5, lsr r7 -; NEON-NEXT: pop {r4, r5, r6, r7, r8, pc} +; NEON-NEXT: orr r1, r1, r3, lsr r7 +; NEON-NEXT: pop {r4, r5, r6, r7, r11, pc} %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) ret i37 %f } diff --git a/llvm/test/CodeGen/ARM/ifcvt1.ll b/llvm/test/CodeGen/ARM/ifcvt1.ll index d419cbc48fc48..6d59869bc102e 100644 --- a/llvm/test/CodeGen/ARM/ifcvt1.ll +++ b/llvm/test/CodeGen/ARM/ifcvt1.ll @@ -13,10 +13,10 @@ define i32 @t1(i32 %a, i32 %b) { ; ; SWIFT-LABEL: t1: ; SWIFT: @ %bb.0: @ %common.ret -; SWIFT-NEXT: mov r2, #1 ; SWIFT-NEXT: cmp r0, #0 -; SWIFT-NEXT: mvneq r2, #0 -; SWIFT-NEXT: add r0, r1, r2 +; SWIFT-NEXT: mov r0, #1 +; SWIFT-NEXT: mvneq r0, #0 +; SWIFT-NEXT: add r0, r1, r0 ; SWIFT-NEXT: bx lr %tmp2 = icmp eq i32 %a, 0 br i1 %tmp2, label %cond_false, label %cond_true diff --git a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll index 4003af5d44be8..dd33b09fe8300 100644 --- a/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll +++ b/llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll @@ -494,14 +494,14 @@ define <4 x float> @fminnumv432_intrinsic(<4 x float> %x, <4 x float> %y) { ; ARMV7-NEXT: vld1.64 {d0, d1}, [r12] ; ARMV7-NEXT: vmov d3, r2, r3 ; ARMV7-NEXT: vmov d2, r0, r1 -; ARMV7-NEXT: vcmp.f32 s7, s3 -; ARMV7-NEXT: vmrs APSR_nzcv, fpscr ; ARMV7-NEXT: vcmp.f32 s6, s2 -; ARMV7-NEXT: vmovlt.f32 s3, s7 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vcmp.f32 s5, s1 +; ARMV7-NEXT: vcmp.f32 s7, s3 ; ARMV7-NEXT: vmovlt.f32 s2, s6 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vcmp.f32 s5, s1 +; ARMV7-NEXT: vmovlt.f32 s3, s7 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr ; ARMV7-NEXT: vcmp.f32 s4, s0 ; ARMV7-NEXT: vmovlt.f32 s1, s5 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr @@ -676,14 +676,14 @@ define <4 x float> @fmaxnumv432_intrinsic(<4 x float> %x, <4 x float> %y) { ; ARMV7-NEXT: vld1.64 {d0, d1}, [r12] ; ARMV7-NEXT: vmov d3, r2, r3 ; ARMV7-NEXT: vmov d2, r0, r1 -; ARMV7-NEXT: vcmp.f32 s7, s3 -; ARMV7-NEXT: vmrs APSR_nzcv, fpscr ; ARMV7-NEXT: vcmp.f32 s6, s2 -; ARMV7-NEXT: vmovgt.f32 s3, s7 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vcmp.f32 s5, s1 +; ARMV7-NEXT: vcmp.f32 s7, s3 ; ARMV7-NEXT: vmovgt.f32 s2, s6 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vcmp.f32 s5, s1 +; ARMV7-NEXT: vmovgt.f32 s3, s7 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr ; ARMV7-NEXT: vcmp.f32 s4, s0 ; ARMV7-NEXT: vmovgt.f32 s1, s5 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr @@ -760,16 +760,16 @@ define <4 x float> @fmaxnumv432_zero_intrinsic(<4 x float> %x) { ; ARMV7-NEXT: vmov d3, r2, r3 ; ARMV7-NEXT: vldr s0, .LCPI21_0 ; ARMV7-NEXT: vmov d2, r0, r1 -; ARMV7-NEXT: vcmp.f32 s7, #0 -; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmov.f32 s3, s0 ; ARMV7-NEXT: vcmp.f32 s6, #0 -; ARMV7-NEXT: vmovgt.f32 s3, s7 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr ; ARMV7-NEXT: vmov.f32 s2, s0 -; ARMV7-NEXT: vcmp.f32 s5, #0 +; ARMV7-NEXT: vcmp.f32 s7, #0 ; ARMV7-NEXT: vmovgt.f32 s2, s6 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vmov.f32 s3, s0 +; ARMV7-NEXT: vcmp.f32 s5, #0 +; ARMV7-NEXT: vmovgt.f32 s3, s7 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr ; ARMV7-NEXT: vmov.f32 s1, s0 ; ARMV7-NEXT: vcmp.f32 s4, #0 ; ARMV7-NEXT: vmovgt.f32 s1, s5 @@ -812,18 +812,18 @@ define <4 x float> @fmaxnumv432_minus_zero_intrinsic(<4 x float> %x) { ; ARMV7-NEXT: vldr s0, .LCPI22_0 ; ARMV7-NEXT: vmov d3, r2, r3 ; ARMV7-NEXT: vmov d2, r0, r1 -; ARMV7-NEXT: vcmp.f32 s7, s0 -; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmov.f32 s3, s0 ; ARMV7-NEXT: vcmp.f32 s6, s0 -; ARMV7-NEXT: vmovgt.f32 s3, s7 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr +; ARMV7-NEXT: vcmp.f32 s7, s0 ; ARMV7-NEXT: vmov.f32 s2, s0 -; ARMV7-NEXT: vcmp.f32 s5, s0 ; ARMV7-NEXT: vmovgt.f32 s2, s6 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr -; ARMV7-NEXT: vmov.f32 s1, s0 +; ARMV7-NEXT: vcmp.f32 s5, s0 +; ARMV7-NEXT: vmov.f32 s3, s0 +; ARMV7-NEXT: vmovgt.f32 s3, s7 +; ARMV7-NEXT: vmrs APSR_nzcv, fpscr ; ARMV7-NEXT: vcmp.f32 s4, s0 +; ARMV7-NEXT: vmov.f32 s1, s0 ; ARMV7-NEXT: vmovgt.f32 s1, s5 ; ARMV7-NEXT: vmrs APSR_nzcv, fpscr ; ARMV7-NEXT: vmovgt.f32 s0, s4 @@ -933,8 +933,8 @@ define <2 x double> @fminnumv264_intrinsic(<2 x double> %x, <2 x double> %y) { ; ARMV8M-NEXT: vselgt.f64 d0, d0, d2 ; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr ; ARMV8M-NEXT: vmov r0, r1, d0 -; ARMV8M-NEXT: vselgt.f64 d1, d1, d3 -; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: vselgt.f64 d0, d1, d3 +; ARMV8M-NEXT: vmov r2, r3, d0 ; ARMV8M-NEXT: bx lr %a = call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y) ret <2 x double> %a @@ -981,8 +981,8 @@ define <2 x double> @fminnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y) ; ARMV8M-NEXT: vselgt.f64 d0, d0, d2 ; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr ; ARMV8M-NEXT: vmov r0, r1, d0 -; ARMV8M-NEXT: vselgt.f64 d1, d1, d3 -; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: vselgt.f64 d0, d1, d3 +; ARMV8M-NEXT: vmov r2, r3, d0 ; ARMV8M-NEXT: bx lr %a = call nnan nsz <2 x double> @llvm.minnum.v2f64(<2 x double> %x, <2 x double> %y) ret <2 x double> %a @@ -1225,26 +1225,26 @@ define <2 x double> @fmaxnumv264_zero_intrinsic(<2 x double> %x) { ; ARMV8M-LABEL: fmaxnumv264_zero_intrinsic: ; ARMV8M: @ %bb.0: ; ARMV8M-NEXT: vmov d2, r0, r1 -; ARMV8M-NEXT: vldr d1, .LCPI30_1 +; ARMV8M-NEXT: vldr d0, .LCPI30_0 ; ARMV8M-NEXT: vcmp.f64 d2, #0 ; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr -; ARMV8M-NEXT: vmov d3, r2, r3 -; ARMV8M-NEXT: vcmp.f64 d3, d1 -; ARMV8M-NEXT: vldr d0, .LCPI30_0 -; ARMV8M-NEXT: vselgt.f64 d0, d2, d0 +; ARMV8M-NEXT: vmov d1, r2, r3 +; ARMV8M-NEXT: vcmp.f64 d1, d0 +; ARMV8M-NEXT: vldr d3, .LCPI30_1 +; ARMV8M-NEXT: vselgt.f64 d2, d2, d3 ; ARMV8M-NEXT: vmrs APSR_nzcv, fpscr -; ARMV8M-NEXT: vmov r0, r1, d0 -; ARMV8M-NEXT: vselgt.f64 d1, d3, d1 -; ARMV8M-NEXT: vmov r2, r3, d1 +; ARMV8M-NEXT: vmov r0, r1, d2 +; ARMV8M-NEXT: vselgt.f64 d0, d1, d0 +; ARMV8M-NEXT: vmov r2, r3, d0 ; ARMV8M-NEXT: bx lr ; ARMV8M-NEXT: .p2align 3 ; ARMV8M-NEXT: @ %bb.1: ; ARMV8M-NEXT: .LCPI30_0: -; ARMV8M-NEXT: .long 0 @ double 0 -; ARMV8M-NEXT: .long 0 -; ARMV8M-NEXT: .LCPI30_1: ; ARMV8M-NEXT: .long 0 @ double -0 ; ARMV8M-NEXT: .long 2147483648 +; ARMV8M-NEXT: .LCPI30_1: +; ARMV8M-NEXT: .long 0 @ double 0 +; ARMV8M-NEXT: .long 0 %a = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> %x, <2 x double>) ret <2 x double> %a } diff --git a/llvm/test/CodeGen/ARM/neon_vabd.ll b/llvm/test/CodeGen/ARM/neon_vabd.ll index 8a268d46304cf..ffc72b242f829 100644 --- a/llvm/test/CodeGen/ARM/neon_vabd.ll +++ b/llvm/test/CodeGen/ARM/neon_vabd.ll @@ -144,25 +144,25 @@ define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: vmov r0, r12, d0 +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: vmov r2, r3, d2 -; CHECK-NEXT: vmov r1, lr, d1 -; CHECK-NEXT: vmov r4, r5, d3 +; CHECK-NEXT: vmov r2, r3, d3 +; CHECK-NEXT: vmov r12, lr, d0 +; CHECK-NEXT: vmov r4, r5, d2 ; CHECK-NEXT: vsub.i64 q8, q0, q1 ; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: sbcs r0, r3, r12 +; CHECK-NEXT: sbcs r0, r3, r1 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: subs r1, r4, r1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: subs r1, r4, r12 ; CHECK-NEXT: sbcs r1, r5, lr +; CHECK-NEXT: vdup.32 d19, r0 ; CHECK-NEXT: movwlt r6, #1 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: mvnne r6, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vdup.32 d19, r6 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d18, r0 +; CHECK-NEXT: vdup.32 d18, r6 ; CHECK-NEXT: veor q8, q8, q9 ; CHECK-NEXT: vsub.i64 q0, q9, q8 ; CHECK-NEXT: pop {r4, r5, r6, pc} @@ -475,25 +475,25 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) { ; CHECK: @ %bb.0: ; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: vmov r0, r12, d0 +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: vmov r2, r3, d2 -; CHECK-NEXT: vmov r1, lr, d1 -; CHECK-NEXT: vmov r4, r5, d3 +; CHECK-NEXT: vmov r2, r3, d3 +; CHECK-NEXT: vmov r12, lr, d0 +; CHECK-NEXT: vmov r4, r5, d2 ; CHECK-NEXT: vsub.i64 q8, q0, q1 ; CHECK-NEXT: subs r0, r2, r0 -; CHECK-NEXT: sbcs r0, r3, r12 +; CHECK-NEXT: sbcs r0, r3, r1 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movwlt r0, #1 -; CHECK-NEXT: subs r1, r4, r1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: subs r1, r4, r12 ; CHECK-NEXT: sbcs r1, r5, lr +; CHECK-NEXT: vdup.32 d19, r0 ; CHECK-NEXT: movwlt r6, #1 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: mvnne r6, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vdup.32 d19, r6 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d18, r0 +; CHECK-NEXT: vdup.32 d18, r6 ; CHECK-NEXT: veor q8, q8, q9 ; CHECK-NEXT: vsub.i64 q0, q9, q8 ; CHECK-NEXT: pop {r4, r5, r6, pc} diff --git a/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll b/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll index 198927d1da3a4..2631189979e37 100644 --- a/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll +++ b/llvm/test/CodeGen/ARM/overflow-intrinsic-optimizations.ll @@ -164,9 +164,9 @@ cont2: define void @extern_loop(i32 %n) local_unnamed_addr #0 { ; Do not replace the compare around the clobbering call. -; CHECK: add {{r[0-9]+}}, {{r[0-9]+}}, #1 -; CHECK-NEXT: bl external_fn -; CHECK: cmp +; CHECK: bl external_fn +; CHECK-NEXT: adds +; CHECK-NEXT: bvs entry: %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %n, i32 1) %1 = extractvalue { i32, i1 } %0, 1 diff --git a/llvm/test/CodeGen/ARM/sadd_sat.ll b/llvm/test/CodeGen/ARM/sadd_sat.ll index 0060b4458081b..b8f7a2daaeaba 100644 --- a/llvm/test/CodeGen/ARM/sadd_sat.ll +++ b/llvm/test/CodeGen/ARM/sadd_sat.ll @@ -72,22 +72,21 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; CHECK-T16-NEXT: adcs r3, r4 ; CHECK-T16-NEXT: eors r4, r3 ; CHECK-T16-NEXT: bics r4, r1 -; CHECK-T16-NEXT: asrs r1, r3, #31 +; CHECK-T16-NEXT: asrs r0, r3, #31 +; CHECK-T16-NEXT: movs r1, #1 +; CHECK-T16-NEXT: lsls r1, r1, #31 +; CHECK-T16-NEXT: eors r1, r0 ; CHECK-T16-NEXT: cmp r4, #0 -; CHECK-T16-NEXT: mov r0, r1 -; CHECK-T16-NEXT: bmi .LBB1_2 +; CHECK-T16-NEXT: bpl .LBB1_3 ; CHECK-T16-NEXT: @ %bb.1: -; CHECK-T16-NEXT: mov r0, r2 +; CHECK-T16-NEXT: bpl .LBB1_4 ; CHECK-T16-NEXT: .LBB1_2: -; CHECK-T16-NEXT: cmp r4, #0 -; CHECK-T16-NEXT: bmi .LBB1_4 -; CHECK-T16-NEXT: @ %bb.3: -; CHECK-T16-NEXT: mov r1, r3 ; CHECK-T16-NEXT: pop {r4, pc} +; CHECK-T16-NEXT: .LBB1_3: +; CHECK-T16-NEXT: mov r0, r2 +; CHECK-T16-NEXT: bmi .LBB1_2 ; CHECK-T16-NEXT: .LBB1_4: -; CHECK-T16-NEXT: movs r2, #1 -; CHECK-T16-NEXT: lsls r2, r2, #31 -; CHECK-T16-NEXT: eors r1, r2 +; CHECK-T16-NEXT: mov r1, r3 ; CHECK-T16-NEXT: pop {r4, pc} ; ; CHECK-T2-LABEL: func2: @@ -128,23 +127,22 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; CHECK-T15TE-NEXT: adcs r3, r4 ; CHECK-T15TE-NEXT: eors r4, r3 ; CHECK-T15TE-NEXT: bics r4, r1 -; CHECK-T15TE-NEXT: asrs r1, r3, #31 +; CHECK-T15TE-NEXT: asrs r0, r3, #31 +; CHECK-T15TE-NEXT: movs r1, #1 +; CHECK-T15TE-NEXT: lsls r1, r1, #31 +; CHECK-T15TE-NEXT: eors r1, r0 ; CHECK-T15TE-NEXT: cmp r4, #0 -; CHECK-T15TE-NEXT: mov r12, r1 -; CHECK-T15TE-NEXT: mov r0, r12 -; CHECK-T15TE-NEXT: bmi .LBB1_2 +; CHECK-T15TE-NEXT: bpl .LBB1_3 ; CHECK-T15TE-NEXT: @ %bb.1: -; CHECK-T15TE-NEXT: movs r0, r2 +; CHECK-T15TE-NEXT: bpl .LBB1_4 ; CHECK-T15TE-NEXT: .LBB1_2: -; CHECK-T15TE-NEXT: cmp r4, #0 -; CHECK-T15TE-NEXT: bmi .LBB1_4 -; CHECK-T15TE-NEXT: @ %bb.3: -; CHECK-T15TE-NEXT: movs r1, r3 ; CHECK-T15TE-NEXT: pop {r4, pc} +; CHECK-T15TE-NEXT: .LBB1_3: +; CHECK-T15TE-NEXT: mov r12, r2 +; CHECK-T15TE-NEXT: mov r0, r12 +; CHECK-T15TE-NEXT: bmi .LBB1_2 ; CHECK-T15TE-NEXT: .LBB1_4: -; CHECK-T15TE-NEXT: movs r2, #1 -; CHECK-T15TE-NEXT: lsls r2, r2, #31 -; CHECK-T15TE-NEXT: eors r1, r2 +; CHECK-T15TE-NEXT: movs r1, r3 ; CHECK-T15TE-NEXT: pop {r4, pc} %tmp = call i64 @llvm.sadd.sat.i64(i64 %x, i64 %y) ret i64 %tmp diff --git a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll index 859aedc7a3f01..0ddb64fc3f2d1 100644 --- a/llvm/test/CodeGen/ARM/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/sadd_sat_plus.ll @@ -63,22 +63,22 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; CHECK-T1-NEXT: adcs r3, r1 ; CHECK-T1-NEXT: eors r1, r3 ; CHECK-T1-NEXT: bics r1, r2 -; CHECK-T1-NEXT: asrs r2, r3, #31 +; CHECK-T1-NEXT: asrs r0, r3, #31 +; CHECK-T1-NEXT: movs r2, #1 +; CHECK-T1-NEXT: lsls r2, r2, #31 +; CHECK-T1-NEXT: eors r2, r0 ; CHECK-T1-NEXT: cmp r1, #0 -; CHECK-T1-NEXT: mov r0, r2 -; CHECK-T1-NEXT: bmi .LBB1_2 +; CHECK-T1-NEXT: bpl .LBB1_3 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: mov r0, r4 +; CHECK-T1-NEXT: bpl .LBB1_4 ; CHECK-T1-NEXT: .LBB1_2: -; CHECK-T1-NEXT: cmp r1, #0 -; CHECK-T1-NEXT: bmi .LBB1_4 -; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: mov r1, r3 +; CHECK-T1-NEXT: mov r1, r2 ; CHECK-T1-NEXT: pop {r4, pc} +; CHECK-T1-NEXT: .LBB1_3: +; CHECK-T1-NEXT: mov r0, r4 +; CHECK-T1-NEXT: bmi .LBB1_2 ; CHECK-T1-NEXT: .LBB1_4: -; CHECK-T1-NEXT: movs r1, #1 -; CHECK-T1-NEXT: lsls r1, r1, #31 -; CHECK-T1-NEXT: eors r2, r1 +; CHECK-T1-NEXT: mov r2, r3 ; CHECK-T1-NEXT: mov r1, r2 ; CHECK-T1-NEXT: pop {r4, pc} ; diff --git a/llvm/test/CodeGen/ARM/select.ll b/llvm/test/CodeGen/ARM/select.ll index 496a6c0f5acbb..48d6ee925d939 100644 --- a/llvm/test/CodeGen/ARM/select.ll +++ b/llvm/test/CodeGen/ARM/select.ll @@ -320,11 +320,11 @@ define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp { ; ; CHECK-VFP-LABEL: f10: ; CHECK-VFP: @ %bb.0: -; CHECK-VFP-NEXT: vmov.f32 s2, #1.000000e+00 -; CHECK-VFP-NEXT: vldr s0, .LCPI9_0 +; CHECK-VFP-NEXT: vmov.f32 s0, #1.000000e+00 +; CHECK-VFP-NEXT: vldr s2, .LCPI9_0 ; CHECK-VFP-NEXT: cmp r0, r1 -; CHECK-VFP-NEXT: vmoveq.f32 s0, s2 -; CHECK-VFP-NEXT: vmov r0, s0 +; CHECK-VFP-NEXT: vmoveq.f32 s2, s0 +; CHECK-VFP-NEXT: vmov r0, s2 ; CHECK-VFP-NEXT: bx lr ; CHECK-VFP-NEXT: .p2align 2 ; CHECK-VFP-NEXT: @ %bb.1: @@ -333,12 +333,12 @@ define float @f10(i32 %a, i32 %b) nounwind uwtable readnone ssp { ; ; CHECK-NEON-LABEL: f10: ; CHECK-NEON: @ %bb.0: -; CHECK-NEON-NEXT: vldr s0, LCPI9_0 -; CHECK-NEON-NEXT: vmov.f32 s2, #1.000000e+00 +; CHECK-NEON-NEXT: vldr s2, LCPI9_0 +; CHECK-NEON-NEXT: vmov.f32 s0, #1.000000e+00 ; CHECK-NEON-NEXT: cmp r0, r1 ; CHECK-NEON-NEXT: it eq -; CHECK-NEON-NEXT: vmoveq.f32 s0, s2 -; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmoveq.f32 s2, s0 +; CHECK-NEON-NEXT: vmov r0, s2 ; CHECK-NEON-NEXT: bx lr ; CHECK-NEON-NEXT: .p2align 2 ; CHECK-NEON-NEXT: @ %bb.1: @@ -364,11 +364,11 @@ define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp { ; ; CHECK-VFP-LABEL: f11: ; CHECK-VFP: @ %bb.0: -; CHECK-VFP-NEXT: vmov.f32 s2, #-1.000000e+00 -; CHECK-VFP-NEXT: vldr s0, .LCPI10_0 +; CHECK-VFP-NEXT: vmov.f32 s0, #-1.000000e+00 +; CHECK-VFP-NEXT: vldr s2, .LCPI10_0 ; CHECK-VFP-NEXT: cmp r0, r1 -; CHECK-VFP-NEXT: vmoveq.f32 s0, s2 -; CHECK-VFP-NEXT: vmov r0, s0 +; CHECK-VFP-NEXT: vmoveq.f32 s2, s0 +; CHECK-VFP-NEXT: vmov r0, s2 ; CHECK-VFP-NEXT: bx lr ; CHECK-VFP-NEXT: .p2align 2 ; CHECK-VFP-NEXT: @ %bb.1: @@ -377,12 +377,12 @@ define float @f11(i32 %a, i32 %b) nounwind uwtable readnone ssp { ; ; CHECK-NEON-LABEL: f11: ; CHECK-NEON: @ %bb.0: -; CHECK-NEON-NEXT: vldr s0, LCPI10_0 -; CHECK-NEON-NEXT: vmov.f32 s2, #-1.000000e+00 +; CHECK-NEON-NEXT: vldr s2, LCPI10_0 +; CHECK-NEON-NEXT: vmov.f32 s0, #-1.000000e+00 ; CHECK-NEON-NEXT: cmp r0, r1 ; CHECK-NEON-NEXT: it eq -; CHECK-NEON-NEXT: vmoveq.f32 s0, s2 -; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmoveq.f32 s2, s0 +; CHECK-NEON-NEXT: vmov r0, s2 ; CHECK-NEON-NEXT: bx lr ; CHECK-NEON-NEXT: .p2align 2 ; CHECK-NEON-NEXT: @ %bb.1: @@ -406,11 +406,11 @@ define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp { ; ; CHECK-VFP-LABEL: f12: ; CHECK-VFP: @ %bb.0: -; CHECK-VFP-NEXT: vmov.f32 s2, #1.000000e+00 -; CHECK-VFP-NEXT: vldr s0, .LCPI11_0 +; CHECK-VFP-NEXT: vmov.f32 s0, #1.000000e+00 +; CHECK-VFP-NEXT: vldr s2, .LCPI11_0 ; CHECK-VFP-NEXT: cmp r0, r1 -; CHECK-VFP-NEXT: vmoveq.f32 s0, s2 -; CHECK-VFP-NEXT: vmov r0, s0 +; CHECK-VFP-NEXT: vmoveq.f32 s2, s0 +; CHECK-VFP-NEXT: vmov r0, s2 ; CHECK-VFP-NEXT: bx lr ; CHECK-VFP-NEXT: .p2align 2 ; CHECK-VFP-NEXT: @ %bb.1: @@ -419,12 +419,12 @@ define float @f12(i32 %a, i32 %b) nounwind uwtable readnone ssp { ; ; CHECK-NEON-LABEL: f12: ; CHECK-NEON: @ %bb.0: -; CHECK-NEON-NEXT: vldr s0, LCPI11_0 -; CHECK-NEON-NEXT: vmov.f32 s2, #1.000000e+00 +; CHECK-NEON-NEXT: vldr s2, LCPI11_0 +; CHECK-NEON-NEXT: vmov.f32 s0, #1.000000e+00 ; CHECK-NEON-NEXT: cmp r0, r1 ; CHECK-NEON-NEXT: it eq -; CHECK-NEON-NEXT: vmoveq.f32 s0, s2 -; CHECK-NEON-NEXT: vmov r0, s0 +; CHECK-NEON-NEXT: vmoveq.f32 s2, s0 +; CHECK-NEON-NEXT: vmov r0, s2 ; CHECK-NEON-NEXT: bx lr ; CHECK-NEON-NEXT: .p2align 2 ; CHECK-NEON-NEXT: @ %bb.1: diff --git a/llvm/test/CodeGen/ARM/select_const.ll b/llvm/test/CodeGen/ARM/select_const.ll index df95af313eac6..180daa12e7c52 100644 --- a/llvm/test/CodeGen/ARM/select_const.ll +++ b/llvm/test/CodeGen/ARM/select_const.ll @@ -645,12 +645,13 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) { ; THUMB2-NEXT: push {r7, lr} ; THUMB2-NEXT: ands r12, r0, #1 ; THUMB2-NEXT: mov.w lr, #1 -; THUMB2-NEXT: itt ne -; THUMB2-NEXT: movne.w lr, #65536 +; THUMB2-NEXT: it ne ; THUMB2-NEXT: movne.w r12, #1 +; THUMB2-NEXT: it ne +; THUMB2-NEXT: movne.w lr, #65536 ; THUMB2-NEXT: subs.w r0, lr, #1 -; THUMB2-NEXT: sbc r1, r12, #0 ; THUMB2-NEXT: eor r3, r3, #1 +; THUMB2-NEXT: sbc r1, r12, #0 ; THUMB2-NEXT: eor r2, r2, #65537 ; THUMB2-NEXT: orrs r2, r3 ; THUMB2-NEXT: itt ne @@ -688,11 +689,12 @@ define i64 @opaque_constant1(i1 %cond, i64 %x) { ; THUMB-NEXT: ldr r6, .LCPI24_0 ; THUMB-NEXT: eors r2, r6 ; THUMB-NEXT: orrs r2, r3 +; THUMB-NEXT: cmp r2, #0 ; THUMB-NEXT: beq .LBB24_5 ; THUMB-NEXT: @ %bb.4: -; THUMB-NEXT: movs r1, r4 +; THUMB-NEXT: mov r12, r4 +; THUMB-NEXT: mov r1, r12 ; THUMB-NEXT: .LBB24_5: -; THUMB-NEXT: cmp r2, #0 ; THUMB-NEXT: beq .LBB24_7 ; THUMB-NEXT: @ %bb.6: ; THUMB-NEXT: movs r0, r5 diff --git a/llvm/test/CodeGen/ARM/shift-i64.ll b/llvm/test/CodeGen/ARM/shift-i64.ll index 33e0ba1457e72..c326ac1529b2f 100644 --- a/llvm/test/CodeGen/ARM/shift-i64.ll +++ b/llvm/test/CodeGen/ARM/shift-i64.ll @@ -52,14 +52,14 @@ define i64 @test_lshr(i64 %val, i64 %amt) { define i64 @test_ashr(i64 %val, i64 %amt) { ; CHECK-LABEL: test_ashr: ; CHECK: @ %bb.0: -; CHECK-NEXT: asr r3, r1, r2 -; CHECK-NEXT: subs r12, r2, #32 +; CHECK-NEXT: rsb r3, r2, #32 ; CHECK-NEXT: lsr r0, r0, r2 -; CHECK-NEXT: rsb r2, r2, #32 -; CHECK-NEXT: asrpl r3, r1, #31 -; CHECK-NEXT: orr r0, r0, r1, lsl r2 -; CHECK-NEXT: asrpl r0, r1, r12 -; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: orr r0, r0, r1, lsl r3 +; CHECK-NEXT: subs r3, r2, #32 +; CHECK-NEXT: asr r2, r1, r2 +; CHECK-NEXT: asrpl r2, r1, #31 +; CHECK-NEXT: asrpl r0, r1, r3 +; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: mov pc, lr ; ; EXPAND-LABEL: test_ashr: diff --git a/llvm/test/CodeGen/ARM/ssub_sat.ll b/llvm/test/CodeGen/ARM/ssub_sat.ll index 1bafba3b49ed7..0978bfd1f0140 100644 --- a/llvm/test/CodeGen/ARM/ssub_sat.ll +++ b/llvm/test/CodeGen/ARM/ssub_sat.ll @@ -71,22 +71,21 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; CHECK-T1-NEXT: sbcs r2, r3 ; CHECK-T1-NEXT: eors r4, r2 ; CHECK-T1-NEXT: ands r4, r1 -; CHECK-T1-NEXT: asrs r1, r2, #31 +; CHECK-T1-NEXT: asrs r0, r2, #31 +; CHECK-T1-NEXT: movs r1, #1 +; CHECK-T1-NEXT: lsls r1, r1, #31 +; CHECK-T1-NEXT: eors r1, r0 ; CHECK-T1-NEXT: cmp r4, #0 -; CHECK-T1-NEXT: mov r0, r1 -; CHECK-T1-NEXT: bmi .LBB1_2 +; CHECK-T1-NEXT: bpl .LBB1_3 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: mov r0, r5 +; CHECK-T1-NEXT: bpl .LBB1_4 ; CHECK-T1-NEXT: .LBB1_2: -; CHECK-T1-NEXT: cmp r4, #0 -; CHECK-T1-NEXT: bmi .LBB1_4 -; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: mov r1, r2 ; CHECK-T1-NEXT: pop {r4, r5, r7, pc} +; CHECK-T1-NEXT: .LBB1_3: +; CHECK-T1-NEXT: mov r0, r5 +; CHECK-T1-NEXT: bmi .LBB1_2 ; CHECK-T1-NEXT: .LBB1_4: -; CHECK-T1-NEXT: movs r2, #1 -; CHECK-T1-NEXT: lsls r2, r2, #31 -; CHECK-T1-NEXT: eors r1, r2 +; CHECK-T1-NEXT: mov r1, r2 ; CHECK-T1-NEXT: pop {r4, r5, r7, pc} ; ; CHECK-T2-LABEL: func2: diff --git a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll index 0a2d1f0e7a240..adf6cafc6ccb8 100644 --- a/llvm/test/CodeGen/ARM/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/ssub_sat_plus.ll @@ -65,22 +65,22 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; CHECK-T1-NEXT: sbcs r3, r2 ; CHECK-T1-NEXT: eors r1, r3 ; CHECK-T1-NEXT: ands r1, r5 -; CHECK-T1-NEXT: asrs r2, r3, #31 +; CHECK-T1-NEXT: asrs r0, r3, #31 +; CHECK-T1-NEXT: movs r2, #1 +; CHECK-T1-NEXT: lsls r2, r2, #31 +; CHECK-T1-NEXT: eors r2, r0 ; CHECK-T1-NEXT: cmp r1, #0 -; CHECK-T1-NEXT: mov r0, r2 -; CHECK-T1-NEXT: bmi .LBB1_2 +; CHECK-T1-NEXT: bpl .LBB1_3 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: mov r0, r4 +; CHECK-T1-NEXT: bpl .LBB1_4 ; CHECK-T1-NEXT: .LBB1_2: -; CHECK-T1-NEXT: cmp r1, #0 -; CHECK-T1-NEXT: bmi .LBB1_4 -; CHECK-T1-NEXT: @ %bb.3: -; CHECK-T1-NEXT: mov r1, r3 +; CHECK-T1-NEXT: mov r1, r2 ; CHECK-T1-NEXT: pop {r4, r5, r7, pc} +; CHECK-T1-NEXT: .LBB1_3: +; CHECK-T1-NEXT: mov r0, r4 +; CHECK-T1-NEXT: bmi .LBB1_2 ; CHECK-T1-NEXT: .LBB1_4: -; CHECK-T1-NEXT: movs r1, #1 -; CHECK-T1-NEXT: lsls r1, r1, #31 -; CHECK-T1-NEXT: eors r2, r1 +; CHECK-T1-NEXT: mov r2, r3 ; CHECK-T1-NEXT: mov r1, r2 ; CHECK-T1-NEXT: pop {r4, r5, r7, pc} ; diff --git a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll index 046bbbde68642..fb966c29f39a2 100644 --- a/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll +++ b/llvm/test/CodeGen/ARM/sub-cmp-peephole.ll @@ -136,8 +136,8 @@ define float @float_sel(i32 %a, i32 %b, float %x, float %y) { ; ; CHECK-V8-LABEL: float_sel: ; CHECK-V8: @ %bb.0: @ %entry -; CHECK-V8-NEXT: vmov s0, r3 ; CHECK-V8-NEXT: subs r0, r0, r1 +; CHECK-V8-NEXT: vmov s0, r3 ; CHECK-V8-NEXT: vmov s2, r2 ; CHECK-V8-NEXT: vseleq.f32 s0, s2, s0 ; CHECK-V8-NEXT: vmov r0, s0 diff --git a/llvm/test/CodeGen/ARM/uadd_sat.ll b/llvm/test/CodeGen/ARM/uadd_sat.ll index 39c79f4104e6e..5549d9c6c29c0 100644 --- a/llvm/test/CodeGen/ARM/uadd_sat.ll +++ b/llvm/test/CodeGen/ARM/uadd_sat.ll @@ -45,21 +45,19 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; CHECK-T1-NEXT: movs r5, #0 ; CHECK-T1-NEXT: adds r4, r0, r2 ; CHECK-T1-NEXT: adcs r1, r3 -; CHECK-T1-NEXT: mov r3, r5 -; CHECK-T1-NEXT: adcs r3, r5 +; CHECK-T1-NEXT: mov r0, r5 +; CHECK-T1-NEXT: adcs r0, r5 ; CHECK-T1-NEXT: mvns r2, r5 -; CHECK-T1-NEXT: cmp r3, #0 +; CHECK-T1-NEXT: cmp r0, #0 ; CHECK-T1-NEXT: mov r0, r2 ; CHECK-T1-NEXT: beq .LBB1_3 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: cmp r3, #0 ; CHECK-T1-NEXT: beq .LBB1_4 ; CHECK-T1-NEXT: .LBB1_2: ; CHECK-T1-NEXT: mov r1, r2 ; CHECK-T1-NEXT: pop {r4, r5, r7, pc} ; CHECK-T1-NEXT: .LBB1_3: ; CHECK-T1-NEXT: mov r0, r4 -; CHECK-T1-NEXT: cmp r3, #0 ; CHECK-T1-NEXT: bne .LBB1_2 ; CHECK-T1-NEXT: .LBB1_4: ; CHECK-T1-NEXT: mov r2, r1 diff --git a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll index 451b32f730424..ffacba8cf0124 100644 --- a/llvm/test/CodeGen/ARM/uadd_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/uadd_sat_plus.ll @@ -44,31 +44,29 @@ define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; CHECK-T1-LABEL: func64: ; CHECK-T1: @ %bb.0: -; CHECK-T1-NEXT: .save {r4, r5, r7, lr} -; CHECK-T1-NEXT: push {r4, r5, r7, lr} -; CHECK-T1-NEXT: movs r5, #0 -; CHECK-T1-NEXT: ldr r2, [sp, #20] -; CHECK-T1-NEXT: ldr r3, [sp, #16] +; CHECK-T1-NEXT: .save {r4, lr} +; CHECK-T1-NEXT: push {r4, lr} +; CHECK-T1-NEXT: movs r4, #0 +; CHECK-T1-NEXT: ldr r2, [sp, #12] +; CHECK-T1-NEXT: ldr r3, [sp, #8] ; CHECK-T1-NEXT: adds r3, r0, r3 ; CHECK-T1-NEXT: adcs r2, r1 -; CHECK-T1-NEXT: mov r4, r5 -; CHECK-T1-NEXT: adcs r4, r5 -; CHECK-T1-NEXT: mvns r1, r5 -; CHECK-T1-NEXT: cmp r4, #0 +; CHECK-T1-NEXT: mov r0, r4 +; CHECK-T1-NEXT: adcs r0, r4 +; CHECK-T1-NEXT: mvns r1, r4 +; CHECK-T1-NEXT: cmp r0, #0 ; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: beq .LBB1_3 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: cmp r4, #0 ; CHECK-T1-NEXT: beq .LBB1_4 ; CHECK-T1-NEXT: .LBB1_2: -; CHECK-T1-NEXT: pop {r4, r5, r7, pc} +; CHECK-T1-NEXT: pop {r4, pc} ; CHECK-T1-NEXT: .LBB1_3: ; CHECK-T1-NEXT: mov r0, r3 -; CHECK-T1-NEXT: cmp r4, #0 ; CHECK-T1-NEXT: bne .LBB1_2 ; CHECK-T1-NEXT: .LBB1_4: ; CHECK-T1-NEXT: mov r1, r2 -; CHECK-T1-NEXT: pop {r4, r5, r7, pc} +; CHECK-T1-NEXT: pop {r4, pc} ; ; CHECK-T2-LABEL: func64: ; CHECK-T2: @ %bb.0: diff --git a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll index 464808ec8861b..4eb82c80e2bff 100644 --- a/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll @@ -7,207 +7,209 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; ARMV6: @ %bb.0: @ %start ; ARMV6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; ARMV6-NEXT: sub sp, sp, #28 -; ARMV6-NEXT: ldr lr, [sp, #72] -; ARMV6-NEXT: mov r6, r0 -; ARMV6-NEXT: str r0, [sp, #8] @ 4-byte Spill -; ARMV6-NEXT: ldr r4, [sp, #84] -; ARMV6-NEXT: umull r1, r0, r2, lr -; ARMV6-NEXT: umull r5, r10, r4, r2 -; ARMV6-NEXT: str r1, [r6] +; ARMV6-NEXT: ldr r4, [sp, #72] +; ARMV6-NEXT: mov r7, r0 +; ARMV6-NEXT: str r0, [sp, #4] @ 4-byte Spill +; ARMV6-NEXT: ldr r12, [sp, #64] +; ARMV6-NEXT: umull r1, r0, r2, r4 +; ARMV6-NEXT: ldr r5, [sp, #68] +; ARMV6-NEXT: str r1, [r7] +; ARMV6-NEXT: ldr r1, [sp, #76] +; ARMV6-NEXT: umull r7, r6, r1, r12 +; ARMV6-NEXT: str r6, [sp, #8] @ 4-byte Spill +; ARMV6-NEXT: umull r6, r9, r5, r4 +; ARMV6-NEXT: add r7, r6, r7 +; ARMV6-NEXT: umull r4, r6, r12, r4 +; ARMV6-NEXT: str r4, [sp, #16] @ 4-byte Spill +; ARMV6-NEXT: mov r4, #0 +; ARMV6-NEXT: adds r8, r6, r7 ; ARMV6-NEXT: ldr r6, [sp, #80] -; ARMV6-NEXT: umull r1, r7, r3, r6 -; ARMV6-NEXT: str r7, [sp, #12] @ 4-byte Spill -; ARMV6-NEXT: add r1, r5, r1 -; ARMV6-NEXT: umull r7, r5, r6, r2 -; ARMV6-NEXT: mov r6, lr -; ARMV6-NEXT: str r7, [sp, #16] @ 4-byte Spill -; ARMV6-NEXT: mov r7, #0 -; ARMV6-NEXT: adds r1, r5, r1 -; ARMV6-NEXT: str r1, [sp, #4] @ 4-byte Spill -; ARMV6-NEXT: adc r1, r7, #0 -; ARMV6-NEXT: str r1, [sp, #24] @ 4-byte Spill -; ARMV6-NEXT: ldr r1, [sp, #64] -; ARMV6-NEXT: ldr r7, [sp, #76] -; ARMV6-NEXT: ldr r5, [sp, #64] -; ARMV6-NEXT: umull r12, r9, r7, r1 -; ARMV6-NEXT: ldr r1, [sp, #68] -; ARMV6-NEXT: umull r11, r8, r1, lr +; ARMV6-NEXT: adc r7, r4, #0 +; ARMV6-NEXT: ldr r4, [sp, #84] +; ARMV6-NEXT: str r7, [sp, #24] @ 4-byte Spill +; ARMV6-NEXT: umull r12, lr, r3, r6 +; ARMV6-NEXT: umull r11, r7, r4, r2 ; ARMV6-NEXT: add r12, r11, r12 -; ARMV6-NEXT: umull r11, lr, r5, lr -; ARMV6-NEXT: mov r5, r6 -; ARMV6-NEXT: mov r6, #0 -; ARMV6-NEXT: adds r12, lr, r12 -; ARMV6-NEXT: umull r2, lr, r2, r7 -; ARMV6-NEXT: adc r6, r6, #0 +; ARMV6-NEXT: umull r11, r10, r6, r2 +; ARMV6-NEXT: adds r12, r10, r12 +; ARMV6-NEXT: mov r10, #0 +; ARMV6-NEXT: adc r6, r10, #0 ; ARMV6-NEXT: str r6, [sp, #20] @ 4-byte Spill ; ARMV6-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; ARMV6-NEXT: adds r11, r11, r6 -; ARMV6-NEXT: ldr r6, [sp, #4] @ 4-byte Reload -; ARMV6-NEXT: adc r6, r12, r6 -; ARMV6-NEXT: mov r12, #0 -; ARMV6-NEXT: umlal r0, r12, r3, r5 -; ARMV6-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; ARMV6-NEXT: adds r6, r6, r11 +; ARMV6-NEXT: str r6, [sp, #12] @ 4-byte Spill +; ARMV6-NEXT: adc r6, r8, r12 ; ARMV6-NEXT: str r6, [sp, #16] @ 4-byte Spill -; ARMV6-NEXT: ldr r6, [sp, #64] +; ARMV6-NEXT: ldr r6, [sp, #72] +; ARMV6-NEXT: mov r12, #0 +; ARMV6-NEXT: umull r2, r8, r2, r1 +; ARMV6-NEXT: umlal r0, r12, r3, r6 ; ARMV6-NEXT: adds r0, r2, r0 -; ARMV6-NEXT: str r0, [r5, #4] -; ARMV6-NEXT: adcs r0, r12, lr -; ARMV6-NEXT: mov r2, #0 -; ARMV6-NEXT: adc r2, r2, #0 -; ARMV6-NEXT: orrs lr, r6, r1 -; ARMV6-NEXT: ldr r6, [sp, #80] +; ARMV6-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; ARMV6-NEXT: adcs r8, r12, r8 +; ARMV6-NEXT: adc r12, r10, #0 +; ARMV6-NEXT: cmp lr, #0 +; ARMV6-NEXT: str r0, [r2, #4] ; ARMV6-NEXT: movne lr, #1 -; ARMV6-NEXT: umlal r0, r2, r3, r7 -; ARMV6-NEXT: orrs r12, r6, r4 -; ARMV6-NEXT: movne r12, #1 +; ARMV6-NEXT: ldr r11, [sp, #8] @ 4-byte Reload +; ARMV6-NEXT: cmp r7, #0 +; ARMV6-NEXT: movne r7, #1 +; ARMV6-NEXT: ldr r0, [sp, #64] +; ARMV6-NEXT: cmp r11, #0 +; ARMV6-NEXT: umlal r8, r12, r3, r1 +; ARMV6-NEXT: movne r11, #1 ; ARMV6-NEXT: cmp r9, #0 -; ARMV6-NEXT: ldr r6, [sp, #12] @ 4-byte Reload ; ARMV6-NEXT: movne r9, #1 -; ARMV6-NEXT: cmp r8, #0 -; ARMV6-NEXT: movne r8, #1 -; ARMV6-NEXT: cmp r6, #0 -; ARMV6-NEXT: movne r6, #1 -; ARMV6-NEXT: cmp r10, #0 +; ARMV6-NEXT: orrs r10, r0, r5 +; ARMV6-NEXT: ldr r0, [sp, #80] ; ARMV6-NEXT: movne r10, #1 -; ARMV6-NEXT: cmp r1, #0 -; ARMV6-NEXT: movne r1, #1 -; ARMV6-NEXT: cmp r7, #0 -; ARMV6-NEXT: movne r7, #1 +; ARMV6-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; ARMV6-NEXT: orrs r0, r0, r4 +; ARMV6-NEXT: movne r0, #1 ; ARMV6-NEXT: cmp r4, #0 ; ARMV6-NEXT: movne r4, #1 ; ARMV6-NEXT: cmp r3, #0 ; ARMV6-NEXT: movne r3, #1 -; ARMV6-NEXT: adds r0, r0, r11 -; ARMV6-NEXT: str r0, [r5, #8] -; ARMV6-NEXT: and r1, r1, r7 -; ARMV6-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; ARMV6-NEXT: orr r1, r1, r8 +; ARMV6-NEXT: cmp r5, #0 +; ARMV6-NEXT: movne r5, #1 +; ARMV6-NEXT: cmp r1, #0 +; ARMV6-NEXT: movne r1, #1 +; ARMV6-NEXT: adds r6, r8, r6 +; ARMV6-NEXT: str r6, [r2, #8] +; ARMV6-NEXT: and r1, r5, r1 +; ARMV6-NEXT: ldr r6, [sp, #16] @ 4-byte Reload ; ARMV6-NEXT: orr r1, r1, r9 -; ARMV6-NEXT: adcs r0, r2, r0 -; ARMV6-NEXT: str r0, [r5, #12] -; ARMV6-NEXT: and r0, r4, r3 -; ARMV6-NEXT: ldr r2, [sp, #24] @ 4-byte Reload -; ARMV6-NEXT: orr r0, r0, r10 -; ARMV6-NEXT: orr r0, r0, r6 -; ARMV6-NEXT: orr r0, r0, r2 -; ARMV6-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; ARMV6-NEXT: orr r1, r1, r2 -; ARMV6-NEXT: and r2, lr, r12 -; ARMV6-NEXT: orr r1, r2, r1 -; ARMV6-NEXT: orr r0, r1, r0 +; ARMV6-NEXT: orr r1, r1, r11 +; ARMV6-NEXT: and r0, r10, r0 +; ARMV6-NEXT: adcs r6, r12, r6 +; ARMV6-NEXT: str r6, [r2, #12] +; ARMV6-NEXT: ldr r6, [sp, #24] @ 4-byte Reload +; ARMV6-NEXT: orr r1, r1, r6 +; ARMV6-NEXT: orr r0, r0, r1 +; ARMV6-NEXT: and r1, r4, r3 +; ARMV6-NEXT: orr r1, r1, r7 +; ARMV6-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; ARMV6-NEXT: orr r1, r1, lr +; ARMV6-NEXT: orr r1, r1, r3 +; ARMV6-NEXT: orr r0, r0, r1 ; ARMV6-NEXT: mov r1, #0 ; ARMV6-NEXT: adc r1, r1, #0 ; ARMV6-NEXT: orr r0, r0, r1 ; ARMV6-NEXT: and r0, r0, #1 -; ARMV6-NEXT: strb r0, [r5, #16] +; ARMV6-NEXT: strb r0, [r2, #16] ; ARMV6-NEXT: add sp, sp, #28 ; ARMV6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; ; ARMV7-LABEL: muloti_test: ; ARMV7: @ %bb.0: @ %start ; ARMV7-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; ARMV7-NEXT: sub sp, sp, #36 -; ARMV7-NEXT: ldr r5, [sp, #84] -; ARMV7-NEXT: mov r8, r0 -; ARMV7-NEXT: ldr r1, [sp, #72] -; ARMV7-NEXT: ldr r10, [sp, #80] -; ARMV7-NEXT: ldr r9, [sp, #76] -; ARMV7-NEXT: umull r4, lr, r5, r1 -; ARMV7-NEXT: umull r0, r7, r2, r10 -; ARMV7-NEXT: str r4, [sp, #24] @ 4-byte Spill -; ARMV7-NEXT: ldr r4, [sp, #88] -; ARMV7-NEXT: umull r1, r6, r1, r10 +; ARMV7-NEXT: sub sp, sp, #44 +; ARMV7-NEXT: ldr r8, [sp, #88] +; ARMV7-NEXT: mov r9, r0 +; ARMV7-NEXT: ldr r7, [sp, #96] +; ARMV7-NEXT: ldr lr, [sp, #100] +; ARMV7-NEXT: umull r0, r5, r2, r8 +; ARMV7-NEXT: ldr r4, [sp, #80] ; ARMV7-NEXT: str r0, [sp, #32] @ 4-byte Spill -; ARMV7-NEXT: umull r11, r0, r2, r5 -; ARMV7-NEXT: str r6, [sp, #20] @ 4-byte Spill -; ARMV7-NEXT: str r1, [sp, #28] @ 4-byte Spill -; ARMV7-NEXT: umull r6, r12, r3, r4 +; ARMV7-NEXT: umull r1, r0, r3, r7 +; ARMV7-NEXT: str r0, [sp, #4] @ 4-byte Spill +; ARMV7-NEXT: umull r0, r11, lr, r2 +; ARMV7-NEXT: str r1, [sp, #20] @ 4-byte Spill ; ARMV7-NEXT: ldr r1, [sp, #92] -; ARMV7-NEXT: str r0, [sp, #8] @ 4-byte Spill -; ARMV7-NEXT: mov r0, #0 -; ARMV7-NEXT: umlal r7, r0, r3, r10 +; ARMV7-NEXT: str r0, [sp] @ 4-byte Spill +; ARMV7-NEXT: umull r0, r10, r7, r2 +; ARMV7-NEXT: mov r7, r1 +; ARMV7-NEXT: umull r6, r12, r1, r4 +; ARMV7-NEXT: str r0, [sp, #40] @ 4-byte Spill +; ARMV7-NEXT: ldr r0, [sp, #84] +; ARMV7-NEXT: str r6, [sp, #24] @ 4-byte Spill +; ARMV7-NEXT: umull r6, r1, r0, r8 ; ARMV7-NEXT: str r6, [sp, #16] @ 4-byte Spill -; ARMV7-NEXT: umull r6, r1, r1, r2 -; ARMV7-NEXT: umull r2, r4, r4, r2 -; ARMV7-NEXT: str r6, [sp, #4] @ 4-byte Spill +; ARMV7-NEXT: umull r6, r2, r2, r7 +; ARMV7-NEXT: mov r7, r4 +; ARMV7-NEXT: str r6, [sp, #8] @ 4-byte Spill ; ARMV7-NEXT: str r2, [sp, #12] @ 4-byte Spill -; ARMV7-NEXT: adds r2, r11, r7 -; ARMV7-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; ARMV7-NEXT: mov r11, #0 -; ARMV7-NEXT: str r4, [sp] @ 4-byte Spill -; ARMV7-NEXT: umull r6, r4, r9, r10 -; ARMV7-NEXT: adcs r9, r0, r7 -; ARMV7-NEXT: ldr r0, [sp, #32] @ 4-byte Reload -; ARMV7-NEXT: adc r10, r11, #0 -; ARMV7-NEXT: stm r8, {r0, r2} -; ARMV7-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; ARMV7-NEXT: umlal r9, r10, r3, r5 +; ARMV7-NEXT: umull r2, r6, r4, r8 +; ARMV7-NEXT: str r2, [sp, #36] @ 4-byte Spill +; ARMV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload +; ARMV7-NEXT: str r6, [sp, #28] @ 4-byte Spill +; ARMV7-NEXT: mov r6, #0 +; ARMV7-NEXT: str r2, [r9] +; ARMV7-NEXT: umlal r5, r6, r3, r8 ; ARMV7-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; ARMV7-NEXT: add r0, r6, r0 -; ARMV7-NEXT: adds r0, r2, r0 -; ARMV7-NEXT: ldr r6, [sp, #4] @ 4-byte Reload -; ARMV7-NEXT: adc r2, r11, #0 +; ARMV7-NEXT: ldr r4, [sp] @ 4-byte Reload +; ARMV7-NEXT: add r4, r4, r2 +; ARMV7-NEXT: adds r2, r10, r4 +; ARMV7-NEXT: str r2, [sp, #20] @ 4-byte Spill +; ARMV7-NEXT: mov r2, #0 +; ARMV7-NEXT: adc r2, r2, #0 +; ARMV7-NEXT: cmp r12, #0 ; ARMV7-NEXT: str r2, [sp, #32] @ 4-byte Spill -; ARMV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; ARMV7-NEXT: ldr r7, [sp, #28] @ 4-byte Reload -; ARMV7-NEXT: add r2, r6, r2 -; ARMV7-NEXT: ldr r6, [sp] @ 4-byte Reload -; ARMV7-NEXT: adds r2, r6, r2 -; ARMV7-NEXT: ldr r6, [sp, #12] @ 4-byte Reload -; ARMV7-NEXT: adc r11, r11, #0 -; ARMV7-NEXT: adds r7, r7, r6 -; ARMV7-NEXT: ldr r6, [sp, #92] -; ARMV7-NEXT: adc r0, r0, r2 -; ARMV7-NEXT: str r0, [sp, #28] @ 4-byte Spill -; ARMV7-NEXT: ldr r0, [sp, #92] -; ARMV7-NEXT: cmp r3, #0 -; ARMV7-NEXT: movwne r3, #1 -; ARMV7-NEXT: ldr r2, [sp, #76] -; ARMV7-NEXT: cmp r0, #0 -; ARMV7-NEXT: movwne r0, #1 +; ARMV7-NEXT: movwne r12, #1 ; ARMV7-NEXT: cmp r1, #0 +; ARMV7-NEXT: ldr r2, [sp, #96] ; ARMV7-NEXT: movwne r1, #1 -; ARMV7-NEXT: cmp r12, #0 -; ARMV7-NEXT: and r0, r0, r3 -; ARMV7-NEXT: movwne r12, #1 -; ARMV7-NEXT: cmp r5, #0 -; ARMV7-NEXT: orr r0, r0, r1 -; ARMV7-NEXT: movwne r5, #1 +; ARMV7-NEXT: orrs r10, r7, r0 +; ARMV7-NEXT: movwne r10, #1 +; ARMV7-NEXT: orrs r7, r2, lr +; ARMV7-NEXT: ldr r2, [sp, #92] +; ARMV7-NEXT: movwne r7, #1 +; ARMV7-NEXT: cmp r0, #0 +; ARMV7-NEXT: movwne r0, #1 ; ARMV7-NEXT: cmp r2, #0 -; ARMV7-NEXT: mov r1, r2 -; ARMV7-NEXT: mov r3, r2 -; ARMV7-NEXT: movwne r1, #1 +; ARMV7-NEXT: mov r4, r2 +; ARMV7-NEXT: mov r8, r2 +; ARMV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; ARMV7-NEXT: movwne r4, #1 +; ARMV7-NEXT: and r0, r0, r4 +; ARMV7-NEXT: mov r4, #0 +; ARMV7-NEXT: adds r5, r2, r5 +; ARMV7-NEXT: str r5, [r9, #4] +; ARMV7-NEXT: orr r0, r0, r1 +; ARMV7-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; ARMV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; ARMV7-NEXT: and r5, r10, r7 +; ARMV7-NEXT: orr r0, r0, r12 +; ARMV7-NEXT: mov r12, #0 +; ARMV7-NEXT: add r1, r2, r1 +; ARMV7-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; ARMV7-NEXT: adcs r2, r6, r2 +; ARMV7-NEXT: ldr r6, [sp, #28] @ 4-byte Reload +; ARMV7-NEXT: adc r7, r4, #0 +; ARMV7-NEXT: adds r1, r6, r1 +; ARMV7-NEXT: umlal r2, r7, r3, r8 +; ARMV7-NEXT: adc r4, r4, #0 +; ARMV7-NEXT: orr r0, r0, r4 +; ARMV7-NEXT: orr r0, r5, r0 +; ARMV7-NEXT: ldr r4, [sp, #40] @ 4-byte Reload +; ARMV7-NEXT: ldr r5, [sp, #36] @ 4-byte Reload +; ARMV7-NEXT: adds r5, r5, r4 +; ARMV7-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; ARMV7-NEXT: adc r1, r1, r4 +; ARMV7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload ; ARMV7-NEXT: cmp r4, #0 -; ARMV7-NEXT: ldr r2, [sp, #72] ; ARMV7-NEXT: movwne r4, #1 +; ARMV7-NEXT: cmp r3, #0 +; ARMV7-NEXT: movwne r3, #1 ; ARMV7-NEXT: cmp lr, #0 -; ARMV7-NEXT: and r1, r1, r5 ; ARMV7-NEXT: movwne lr, #1 -; ARMV7-NEXT: orrs r2, r2, r3 -; ARMV7-NEXT: ldr r3, [sp, #88] -; ARMV7-NEXT: movwne r2, #1 +; ARMV7-NEXT: cmp r11, #0 +; ARMV7-NEXT: movwne r11, #1 +; ARMV7-NEXT: adds r2, r2, r5 +; ARMV7-NEXT: and r3, lr, r3 +; ARMV7-NEXT: str r2, [r9, #8] +; ARMV7-NEXT: adcs r1, r7, r1 +; ARMV7-NEXT: str r1, [r9, #12] +; ARMV7-NEXT: orr r1, r3, r11 +; ARMV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload ; ARMV7-NEXT: orr r1, r1, r4 -; ARMV7-NEXT: orr r0, r0, r12 -; ARMV7-NEXT: orrs r3, r3, r6 -; ARMV7-NEXT: orr r1, r1, lr -; ARMV7-NEXT: movwne r3, #1 -; ARMV7-NEXT: adds r7, r9, r7 -; ARMV7-NEXT: str r7, [r8, #8] -; ARMV7-NEXT: and r2, r2, r3 -; ARMV7-NEXT: ldr r7, [sp, #28] @ 4-byte Reload -; ARMV7-NEXT: orr r0, r0, r11 -; ARMV7-NEXT: adcs r7, r10, r7 -; ARMV7-NEXT: str r7, [r8, #12] -; ARMV7-NEXT: ldr r7, [sp, #32] @ 4-byte Reload -; ARMV7-NEXT: orr r1, r1, r7 -; ARMV7-NEXT: orr r1, r2, r1 -; ARMV7-NEXT: orr r0, r1, r0 -; ARMV7-NEXT: mov r1, #0 -; ARMV7-NEXT: adc r1, r1, #0 +; ARMV7-NEXT: orr r1, r1, r2 +; ARMV7-NEXT: orr r0, r0, r1 +; ARMV7-NEXT: adc r1, r12, #0 ; ARMV7-NEXT: orr r0, r0, r1 ; ARMV7-NEXT: and r0, r0, #1 -; ARMV7-NEXT: strb r0, [r8, #16] -; ARMV7-NEXT: add sp, sp, #36 +; ARMV7-NEXT: strb r0, [r9, #16] +; ARMV7-NEXT: add sp, sp, #44 ; ARMV7-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 diff --git a/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll index ddf033b19b949..64d9831442970 100644 --- a/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll +++ b/llvm/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll @@ -5,50 +5,49 @@ define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 { ; ARMV6-LABEL: mulodi_test: ; ARMV6: @ %bb.0: @ %start -; ARMV6-NEXT: push {r4, r5, r6, lr} -; ARMV6-NEXT: umull r12, lr, r3, r0 -; ARMV6-NEXT: mov r6, #0 -; ARMV6-NEXT: umull r4, r5, r1, r2 -; ARMV6-NEXT: umull r0, r2, r0, r2 -; ARMV6-NEXT: add r4, r4, r12 -; ARMV6-NEXT: adds r12, r2, r4 -; ARMV6-NEXT: adc r2, r6, #0 +; ARMV6-NEXT: push {r4, r5, r11, lr} +; ARMV6-NEXT: umull r12, lr, r1, r2 +; ARMV6-NEXT: umull r4, r5, r3, r0 +; ARMV6-NEXT: cmp lr, #0 +; ARMV6-NEXT: movne lr, #1 ; ARMV6-NEXT: cmp r3, #0 ; ARMV6-NEXT: movne r3, #1 ; ARMV6-NEXT: cmp r1, #0 +; ARMV6-NEXT: umull r0, r2, r0, r2 ; ARMV6-NEXT: movne r1, #1 -; ARMV6-NEXT: cmp r5, #0 ; ARMV6-NEXT: and r1, r1, r3 -; ARMV6-NEXT: movne r5, #1 -; ARMV6-NEXT: cmp lr, #0 -; ARMV6-NEXT: orr r1, r1, r5 -; ARMV6-NEXT: movne lr, #1 +; ARMV6-NEXT: cmp r5, #0 ; ARMV6-NEXT: orr r1, r1, lr -; ARMV6-NEXT: orr r2, r1, r2 -; ARMV6-NEXT: mov r1, r12 -; ARMV6-NEXT: pop {r4, r5, r6, pc} +; ARMV6-NEXT: movne r5, #1 +; ARMV6-NEXT: orr r3, r1, r5 +; ARMV6-NEXT: add r1, r12, r4 +; ARMV6-NEXT: adds r1, r2, r1 +; ARMV6-NEXT: mov r5, #0 +; ARMV6-NEXT: adc r2, r5, #0 +; ARMV6-NEXT: orr r2, r3, r2 +; ARMV6-NEXT: pop {r4, r5, r11, pc} ; ; ARMV7-LABEL: mulodi_test: ; ARMV7: @ %bb.0: @ %start ; ARMV7-NEXT: push {r4, r5, r11, lr} -; ARMV7-NEXT: umull r12, lr, r1, r2 +; ARMV7-NEXT: umull r12, lr, r3, r0 ; ARMV7-NEXT: cmp r3, #0 -; ARMV7-NEXT: umull r4, r5, r3, r0 ; ARMV7-NEXT: movwne r3, #1 ; ARMV7-NEXT: cmp r1, #0 +; ARMV7-NEXT: umull r0, r4, r0, r2 +; ARMV7-NEXT: umull r2, r5, r1, r2 ; ARMV7-NEXT: movwne r1, #1 -; ARMV7-NEXT: umull r0, r2, r0, r2 -; ARMV7-NEXT: cmp lr, #0 ; ARMV7-NEXT: and r1, r1, r3 -; ARMV7-NEXT: movwne lr, #1 ; ARMV7-NEXT: cmp r5, #0 -; ARMV7-NEXT: orr r1, r1, lr ; ARMV7-NEXT: movwne r5, #1 -; ARMV7-NEXT: orr r3, r1, r5 -; ARMV7-NEXT: add r1, r12, r4 -; ARMV7-NEXT: mov r5, #0 -; ARMV7-NEXT: adds r1, r2, r1 -; ARMV7-NEXT: adc r2, r5, #0 +; ARMV7-NEXT: cmp lr, #0 +; ARMV7-NEXT: orr r1, r1, r5 +; ARMV7-NEXT: movwne lr, #1 +; ARMV7-NEXT: orr r3, r1, lr +; ARMV7-NEXT: add r1, r2, r12 +; ARMV7-NEXT: mov r2, #0 +; ARMV7-NEXT: adds r1, r4, r1 +; ARMV7-NEXT: adc r2, r2, #0 ; ARMV7-NEXT: orr r2, r3, r2 ; ARMV7-NEXT: pop {r4, r5, r11, pc} start: diff --git a/llvm/test/CodeGen/ARM/usub_sat.ll b/llvm/test/CodeGen/ARM/usub_sat.ll index 9c2fd3966ea98..73e6dafc08590 100644 --- a/llvm/test/CodeGen/ARM/usub_sat.ll +++ b/llvm/test/CodeGen/ARM/usub_sat.ll @@ -49,16 +49,15 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; CHECK-T1-NEXT: adcs r0, r1 ; CHECK-T1-NEXT: movs r3, #1 ; CHECK-T1-NEXT: eors r3, r0 +; CHECK-T1-NEXT: cmp r3, #0 ; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: beq .LBB1_3 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: cmp r3, #0 ; CHECK-T1-NEXT: beq .LBB1_4 ; CHECK-T1-NEXT: .LBB1_2: ; CHECK-T1-NEXT: pop {r4, pc} ; CHECK-T1-NEXT: .LBB1_3: ; CHECK-T1-NEXT: mov r0, r2 -; CHECK-T1-NEXT: cmp r3, #0 ; CHECK-T1-NEXT: bne .LBB1_2 ; CHECK-T1-NEXT: .LBB1_4: ; CHECK-T1-NEXT: mov r1, r4 @@ -70,7 +69,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; CHECK-T2-NEXT: mov.w r12, #0 ; CHECK-T2-NEXT: sbcs r1, r3 ; CHECK-T2-NEXT: adc r2, r12, #0 -; CHECK-T2-NEXT: eors r2, r2, #1 +; CHECK-T2-NEXT: teq.w r2, #1 ; CHECK-T2-NEXT: itt ne ; CHECK-T2-NEXT: movne r0, #0 ; CHECK-T2-NEXT: movne r1, #0 @@ -82,7 +81,7 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; CHECK-ARM-NEXT: mov r12, #0 ; CHECK-ARM-NEXT: sbcs r1, r1, r3 ; CHECK-ARM-NEXT: adc r2, r12, #0 -; CHECK-ARM-NEXT: eors r2, r2, #1 +; CHECK-ARM-NEXT: teq r2, #1 ; CHECK-ARM-NEXT: movwne r0, #0 ; CHECK-ARM-NEXT: movwne r1, #0 ; CHECK-ARM-NEXT: bx lr diff --git a/llvm/test/CodeGen/ARM/usub_sat_plus.ll b/llvm/test/CodeGen/ARM/usub_sat_plus.ll index 51ec83c707603..a465a413c6d0e 100644 --- a/llvm/test/CodeGen/ARM/usub_sat_plus.ll +++ b/llvm/test/CodeGen/ARM/usub_sat_plus.ll @@ -55,16 +55,15 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; CHECK-T1-NEXT: adcs r0, r1 ; CHECK-T1-NEXT: movs r4, #1 ; CHECK-T1-NEXT: eors r4, r0 +; CHECK-T1-NEXT: cmp r4, #0 ; CHECK-T1-NEXT: mov r0, r1 ; CHECK-T1-NEXT: beq .LBB1_3 ; CHECK-T1-NEXT: @ %bb.1: -; CHECK-T1-NEXT: cmp r4, #0 ; CHECK-T1-NEXT: beq .LBB1_4 ; CHECK-T1-NEXT: .LBB1_2: ; CHECK-T1-NEXT: pop {r4, pc} ; CHECK-T1-NEXT: .LBB1_3: ; CHECK-T1-NEXT: mov r0, r3 -; CHECK-T1-NEXT: cmp r4, #0 ; CHECK-T1-NEXT: bne .LBB1_2 ; CHECK-T1-NEXT: .LBB1_4: ; CHECK-T1-NEXT: mov r1, r2 @@ -77,7 +76,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; CHECK-T2-NEXT: subs r0, r0, r2 ; CHECK-T2-NEXT: sbcs r1, r3 ; CHECK-T2-NEXT: adc r2, r12, #0 -; CHECK-T2-NEXT: eors r2, r2, #1 +; CHECK-T2-NEXT: teq.w r2, #1 ; CHECK-T2-NEXT: itt ne ; CHECK-T2-NEXT: movne r0, #0 ; CHECK-T2-NEXT: movne r1, #0 @@ -91,7 +90,7 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { ; CHECK-ARM-NEXT: subs r0, r0, r2 ; CHECK-ARM-NEXT: sbcs r1, r1, r3 ; CHECK-ARM-NEXT: adc r2, r12, #0 -; CHECK-ARM-NEXT: eors r2, r2, #1 +; CHECK-ARM-NEXT: teq r2, #1 ; CHECK-ARM-NEXT: movwne r0, #0 ; CHECK-ARM-NEXT: movwne r1, #0 ; CHECK-ARM-NEXT: bx lr diff --git a/llvm/test/CodeGen/ARM/vselect_imax.ll b/llvm/test/CodeGen/ARM/vselect_imax.ll index 9f0edb7117bd1..bd5e3061f0d18 100644 --- a/llvm/test/CodeGen/ARM/vselect_imax.ll +++ b/llvm/test/CodeGen/ARM/vselect_imax.ll @@ -111,8 +111,8 @@ define void @func_blend15(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) { ; CHECK-LABEL: func_blend18: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]! ; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]! ; CHECK-NEXT: vmov r4, r6, d16 @@ -122,7 +122,6 @@ define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: vmov r2, r1, d20 ; CHECK-NEXT: subs r2, r2, lr -; CHECK-NEXT: vmov r7, lr, d17 ; CHECK-NEXT: vmov r2, r5, d22 ; CHECK-NEXT: sbcs r1, r1, r12 ; CHECK-NEXT: mov r1, #0 @@ -131,33 +130,34 @@ define void @func_blend18(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: mvnne r1, #0 ; CHECK-NEXT: subs r2, r2, r4 ; CHECK-NEXT: sbcs r6, r5, r6 -; CHECK-NEXT: vmov r2, r12, d19 -; CHECK-NEXT: vmov r5, r4, d21 +; CHECK-NEXT: vmov r2, r12, d17 +; CHECK-NEXT: vmov r5, r4, d23 ; CHECK-NEXT: mov r6, #0 ; CHECK-NEXT: movlt r6, #1 ; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: mvnne r6, #0 ; CHECK-NEXT: subs r2, r5, r2 -; CHECK-NEXT: sbcs r4, r4, r12 +; CHECK-NEXT: sbcs r2, r4, r12 +; CHECK-NEXT: vmov lr, r12, d19 +; CHECK-NEXT: vmov r4, r5, d21 ; CHECK-NEXT: mov r2, #0 -; CHECK-NEXT: vmov r4, r5, d23 ; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: subs r7, r4, r7 -; CHECK-NEXT: sbcs r7, r5, lr -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: vdup.32 d25, r0 ; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: vdup.32 d25, r2 ; CHECK-NEXT: vdup.32 d24, r6 -; CHECK-NEXT: vdup.32 d27, r2 ; CHECK-NEXT: vbit q8, q11, q12 +; CHECK-NEXT: subs r4, r4, lr +; CHECK-NEXT: sbcs r5, r5, r12 +; CHECK-NEXT: movlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: vdup.32 d27, r0 ; CHECK-NEXT: vdup.32 d26, r1 ; CHECK-NEXT: vbit q9, q10, q13 ; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]! ; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128] -; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr} +; CHECK-NEXT: pop {r4, r5, r6, lr} ; CHECK-NEXT: mov pc, lr ; COST: func_blend18 ; COST: cost of 0 {{.*}} icmp @@ -198,12 +198,21 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: mvnne r12, #0 ; CHECK-NEXT: subs r1, r1, r2 ; CHECK-NEXT: sbcs r0, r4, r0 -; CHECK-NEXT: vmov r2, r4, d26 +; CHECK-NEXT: vmov r2, r4, d24 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movlt r0, #1 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mvnne r0, #0 ; CHECK-NEXT: vdup.32 d1, r0 +; CHECK-NEXT: vmov r0, r1, d20 +; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: sbcs r0, r4, r1 +; CHECK-NEXT: vmov r2, r4, d26 +; CHECK-NEXT: mov r0, #0 +; CHECK-NEXT: movlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mvnne r0, #0 +; CHECK-NEXT: vdup.32 d0, r0 ; CHECK-NEXT: vmov r0, r1, d22 ; CHECK-NEXT: subs r0, r2, r0 ; CHECK-NEXT: mov r2, #0 @@ -224,14 +233,15 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: vmov r0, r1, d28 ; CHECK-NEXT: subs r0, r4, r0 ; CHECK-NEXT: sbcs r0, r5, r1 -; CHECK-NEXT: vmov r4, r5, d24 +; CHECK-NEXT: vmov r4, r5, d27 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movlt r0, #1 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mvnne r0, #0 ; CHECK-NEXT: vdup.32 d2, r0 -; CHECK-NEXT: vmov r0, r1, d20 +; CHECK-NEXT: vmov r0, r1, d23 ; CHECK-NEXT: vbit q14, q15, q1 +; CHECK-NEXT: vbit q10, q12, q0 ; CHECK-NEXT: subs r0, r4, r0 ; CHECK-NEXT: sbcs r0, r5, r1 ; CHECK-NEXT: vmov r1, r4, d17 @@ -240,27 +250,17 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea ; CHECK-NEXT: movlt r0, #1 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d0, r0 -; CHECK-NEXT: vbit q10, q12, q0 +; CHECK-NEXT: vdup.32 d31, r0 +; CHECK-NEXT: vdup.32 d30, r2 +; CHECK-NEXT: vbit q11, q13, q15 +; CHECK-NEXT: vst1.64 {d28, d29}, [r3:128]! ; CHECK-NEXT: subs r1, r5, r1 ; CHECK-NEXT: sbcs r1, r6, r4 -; CHECK-NEXT: vmov r4, r5, d27 -; CHECK-NEXT: vmov r0, r1, d23 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: movlt r6, #1 -; CHECK-NEXT: subs r0, r4, r0 -; CHECK-NEXT: sbcs r0, r5, r1 ; CHECK-NEXT: movlt lr, #1 ; CHECK-NEXT: cmp lr, #0 ; CHECK-NEXT: mvnne lr, #0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: vdup.32 d31, lr -; CHECK-NEXT: mvnne r6, #0 -; CHECK-NEXT: vdup.32 d30, r2 -; CHECK-NEXT: vdup.32 d3, r6 -; CHECK-NEXT: vbit q11, q13, q15 +; CHECK-NEXT: vdup.32 d3, lr ; CHECK-NEXT: vdup.32 d2, r12 -; CHECK-NEXT: vst1.64 {d28, d29}, [r3:128]! ; CHECK-NEXT: vbit q8, q9, q1 ; CHECK-NEXT: vst1.64 {d20, d21}, [r3:128]! ; CHECK-NEXT: vst1.64 {d22, d23}, [r3:128]! @@ -283,198 +283,194 @@ define void @func_blend19(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storea define void @func_blend20(ptr %loadaddr, ptr %loadaddr2, ptr %blend, ptr %storeaddr) { ; CHECK-LABEL: func_blend20: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11} -; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: mov lr, r0 -; CHECK-NEXT: vld1.64 {d16, d17}, [r8:128]! -; CHECK-NEXT: add r9, r0, #64 -; CHECK-NEXT: add r10, r1, #64 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: add r8, r1, #64 +; CHECK-NEXT: add lr, r0, #64 +; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]! ; CHECK-NEXT: mov r12, #0 -; CHECK-NEXT: vld1.64 {d22, d23}, [lr:128]! -; CHECK-NEXT: vld1.64 {d18, d19}, [r8:128]! -; CHECK-NEXT: vld1.64 {d20, d21}, [lr:128]! -; CHECK-NEXT: vmov r6, r4, d19 -; CHECK-NEXT: vmov r5, r7, d21 -; CHECK-NEXT: vld1.64 {d4, d5}, [r9:128]! -; CHECK-NEXT: vld1.64 {d6, d7}, [r10:128]! -; CHECK-NEXT: vld1.64 {d0, d1}, [r10:128]! -; CHECK-NEXT: vld1.64 {d2, d3}, [r9:128]! -; CHECK-NEXT: subs r6, r5, r6 -; CHECK-NEXT: sbcs r4, r7, r4 -; CHECK-NEXT: vmov r5, r6, d18 -; CHECK-NEXT: vmov r7, r2, d20 +; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]! +; CHECK-NEXT: vmov r4, r5, d17 +; CHECK-NEXT: vmov r6, r7, d25 +; CHECK-NEXT: vld1.64 {d18, d19}, [lr:128]! +; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128]! +; CHECK-NEXT: vld1.64 {d22, d23}, [r8:128]! +; CHECK-NEXT: vld1.64 {d0, d1}, [lr:128]! +; CHECK-NEXT: subs r4, r6, r4 +; CHECK-NEXT: sbcs r4, r7, r5 +; CHECK-NEXT: vmov r5, r6, d16 +; CHECK-NEXT: vmov r7, r2, d24 ; CHECK-NEXT: mov r4, #0 ; CHECK-NEXT: movlt r4, #1 ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: mvnne r4, #0 -; CHECK-NEXT: vdup.32 d31, r4 +; CHECK-NEXT: vdup.32 d27, r4 ; CHECK-NEXT: subs r5, r7, r5 ; CHECK-NEXT: sbcs r2, r2, r6 -; CHECK-NEXT: vmov r4, r5, d3 +; CHECK-NEXT: vmov r5, r6, d1 ; CHECK-NEXT: mov r2, #0 ; CHECK-NEXT: movlt r2, #1 ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: mvnne r2, #0 -; CHECK-NEXT: vdup.32 d30, r2 -; CHECK-NEXT: vmov r0, r2, d1 -; CHECK-NEXT: subs r0, r4, r0 -; CHECK-NEXT: sbcs r0, r5, r2 -; CHECK-NEXT: vmov r4, r5, d2 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d9, r0 -; CHECK-NEXT: vmov r0, r2, d0 -; CHECK-NEXT: subs r0, r4, r0 -; CHECK-NEXT: sbcs r0, r5, r2 -; CHECK-NEXT: vmov r4, r5, d5 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d8, r0 -; CHECK-NEXT: vmov r0, r2, d7 -; CHECK-NEXT: subs r0, r4, r0 -; CHECK-NEXT: sbcs r0, r5, r2 -; CHECK-NEXT: vmov r4, r5, d4 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d11, r0 -; CHECK-NEXT: vmov r0, r2, d6 -; CHECK-NEXT: subs r0, r4, r0 -; CHECK-NEXT: sbcs r0, r5, r2 -; CHECK-NEXT: vmov r4, r5, d23 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d10, r0 -; CHECK-NEXT: vmov r0, r2, d17 -; CHECK-NEXT: subs r0, r4, r0 -; CHECK-NEXT: sbcs r0, r5, r2 +; CHECK-NEXT: vdup.32 d26, r2 +; CHECK-NEXT: vmov r2, r4, d23 +; CHECK-NEXT: vbit q8, q12, q13 +; CHECK-NEXT: vld1.64 {d24, d25}, [r0:128]! +; CHECK-NEXT: vld1.64 {d26, d27}, [r1:128]! +; CHECK-NEXT: vld1.64 {d28, d29}, [lr:128]! +; CHECK-NEXT: subs r2, r5, r2 +; CHECK-NEXT: sbcs r2, r6, r4 ; CHECK-NEXT: vmov r4, r5, d22 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d25, r0 -; CHECK-NEXT: vmov r0, r2, d16 -; CHECK-NEXT: subs r0, r4, r0 -; CHECK-NEXT: sbcs r0, r5, r2 -; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: vdup.32 d24, r0 -; CHECK-NEXT: vorr q13, q12, q12 -; CHECK-NEXT: vbsl q13, q11, q8 -; CHECK-NEXT: vld1.64 {d24, d25}, [r9:128]! -; CHECK-NEXT: vorr q8, q5, q5 -; CHECK-NEXT: vld1.64 {d28, d29}, [r10:128]! -; CHECK-NEXT: vbsl q8, q2, q3 -; CHECK-NEXT: vld1.64 {d6, d7}, [r8:128]! -; CHECK-NEXT: vld1.64 {d22, d23}, [r8:128] -; CHECK-NEXT: vld1.64 {d4, d5}, [lr:128]! -; CHECK-NEXT: vbif q10, q9, q15 -; CHECK-NEXT: vorr q9, q4, q4 -; CHECK-NEXT: vmov r0, r2, d22 -; CHECK-NEXT: vbsl q9, q1, q0 -; CHECK-NEXT: vld1.64 {d30, d31}, [lr:128] +; CHECK-NEXT: vmov r6, r7, d0 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: vdup.32 d3, r2 +; CHECK-NEXT: subs r4, r6, r4 +; CHECK-NEXT: sbcs r4, r7, r5 +; CHECK-NEXT: vmov r2, r5, d27 +; CHECK-NEXT: vmov r6, r7, d25 +; CHECK-NEXT: mov r4, #0 +; CHECK-NEXT: movlt r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mvnne r4, #0 +; CHECK-NEXT: vdup.32 d2, r4 +; CHECK-NEXT: subs r2, r6, r2 +; CHECK-NEXT: sbcs r2, r7, r5 +; CHECK-NEXT: vmov r6, r7, d24 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: vdup.32 d5, r2 +; CHECK-NEXT: vmov r2, r5, d26 +; CHECK-NEXT: subs r2, r6, r2 +; CHECK-NEXT: sbcs r2, r7, r5 +; CHECK-NEXT: vmov r6, r7, d19 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: vdup.32 d4, r2 +; CHECK-NEXT: vmov r2, r5, d21 +; CHECK-NEXT: subs r2, r6, r2 +; CHECK-NEXT: sbcs r2, r7, r5 +; CHECK-NEXT: vmov r6, r7, d18 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: vdup.32 d31, r2 +; CHECK-NEXT: vmov r2, r5, d20 +; CHECK-NEXT: subs r2, r6, r2 +; CHECK-NEXT: sbcs r2, r7, r5 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: vdup.32 d30, r2 +; CHECK-NEXT: vbif q9, q10, q15 +; CHECK-NEXT: vld1.64 {d30, d31}, [r8:128]! +; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128] +; CHECK-NEXT: vbit q13, q12, q2 +; CHECK-NEXT: vld1.64 {d24, d25}, [lr:128] +; CHECK-NEXT: vmov r2, r7, d21 +; CHECK-NEXT: vbit q11, q0, q1 ; CHECK-NEXT: mov lr, #0 -; CHECK-NEXT: vmov r7, r5, d30 -; CHECK-NEXT: vld1.64 {d0, d1}, [r9:128] -; CHECK-NEXT: vld1.64 {d2, d3}, [r10:128] -; CHECK-NEXT: subs r0, r7, r0 -; CHECK-NEXT: sbcs r0, r5, r2 -; CHECK-NEXT: vmov r5, r4, d24 -; CHECK-NEXT: vmov r0, r7, d28 +; CHECK-NEXT: vmov r6, r5, d25 +; CHECK-NEXT: vld1.64 {d4, d5}, [r1:128]! +; CHECK-NEXT: vld1.64 {d6, d7}, [r0:128]! +; CHECK-NEXT: vld1.64 {d0, d1}, [r1:128] +; CHECK-NEXT: vld1.64 {d2, d3}, [r0:128] +; CHECK-NEXT: subs r1, r6, r2 +; CHECK-NEXT: vmov r0, r6, d2 +; CHECK-NEXT: sbcs r1, r5, r7 +; CHECK-NEXT: vmov r2, r7, d0 ; CHECK-NEXT: movlt lr, #1 ; CHECK-NEXT: cmp lr, #0 ; CHECK-NEXT: mvnne lr, #0 -; CHECK-NEXT: subs r0, r5, r0 -; CHECK-NEXT: sbcs r0, r4, r7 -; CHECK-NEXT: vmov r7, r5, d29 -; CHECK-NEXT: vmov r4, r6, d25 +; CHECK-NEXT: subs r0, r0, r2 +; CHECK-NEXT: sbcs r0, r6, r7 +; CHECK-NEXT: vmov r2, r7, d30 +; CHECK-NEXT: vmov r6, r5, d28 ; CHECK-NEXT: mov r0, #0 ; CHECK-NEXT: movlt r0, #1 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mvnne r0, #0 -; CHECK-NEXT: subs r7, r4, r7 +; CHECK-NEXT: subs r2, r6, r2 +; CHECK-NEXT: sbcs r2, r5, r7 +; CHECK-NEXT: vmov r7, r6, d31 +; CHECK-NEXT: vmov r5, r4, d29 +; CHECK-NEXT: mov r2, #0 +; CHECK-NEXT: movlt r2, #1 +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: mvnne r2, #0 +; CHECK-NEXT: subs r7, r5, r7 +; CHECK-NEXT: vmov r5, r1, d7 +; CHECK-NEXT: sbcs r7, r4, r6 ; CHECK-NEXT: mov r4, #0 -; CHECK-NEXT: sbcs r7, r6, r5 -; CHECK-NEXT: vmov r5, r1, d31 -; CHECK-NEXT: vmov r7, r6, d23 +; CHECK-NEXT: vmov r7, r6, d5 ; CHECK-NEXT: movlt r4, #1 ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: mvnne r4, #0 -; CHECK-NEXT: subs r7, r5, r7 -; CHECK-NEXT: mov r5, #0 +; CHECK-NEXT: subs r5, r5, r7 ; CHECK-NEXT: sbcs r1, r1, r6 -; CHECK-NEXT: vmov r6, r2, d5 -; CHECK-NEXT: vmov r1, r7, d7 -; CHECK-NEXT: movlt r5, #1 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: mvnne r5, #0 -; CHECK-NEXT: subs r1, r6, r1 -; CHECK-NEXT: sbcs r1, r2, r7 -; CHECK-NEXT: vmov r6, r7, d4 +; CHECK-NEXT: vmov r6, r7, d6 ; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: movlt r1, #1 ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: mvnne r1, #0 ; CHECK-NEXT: vdup.32 d9, r1 -; CHECK-NEXT: vmov r1, r2, d6 +; CHECK-NEXT: vmov r1, r5, d4 ; CHECK-NEXT: subs r1, r6, r1 -; CHECK-NEXT: sbcs r1, r7, r2 -; CHECK-NEXT: vmov r6, r7, d0 +; CHECK-NEXT: sbcs r1, r7, r5 +; CHECK-NEXT: vmov r6, r7, d3 ; CHECK-NEXT: mov r1, #0 ; CHECK-NEXT: movlt r1, #1 ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: mvnne r1, #0 ; CHECK-NEXT: vdup.32 d8, r1 -; CHECK-NEXT: vmov r1, r2, d2 -; CHECK-NEXT: vbif q2, q3, q4 -; CHECK-NEXT: vdup.32 d7, r5 +; CHECK-NEXT: vmov r1, r5, d1 +; CHECK-NEXT: vbit q2, q3, q4 ; CHECK-NEXT: vdup.32 d9, r4 -; CHECK-NEXT: vmov r4, r5, d1 -; CHECK-NEXT: vdup.32 d8, r0 -; CHECK-NEXT: mov r0, r3 -; CHECK-NEXT: vst1.64 {d26, d27}, [r0:128]! -; CHECK-NEXT: vbif q12, q14, q4 -; CHECK-NEXT: vdup.32 d6, lr -; CHECK-NEXT: vbit q11, q15, q3 -; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128]! +; CHECK-NEXT: vdup.32 d8, r2 ; CHECK-NEXT: subs r1, r6, r1 -; CHECK-NEXT: mov r6, #0 -; CHECK-NEXT: sbcs r1, r7, r2 -; CHECK-NEXT: vmov r1, r2, d3 -; CHECK-NEXT: movlt r6, #1 -; CHECK-NEXT: subs r1, r4, r1 -; CHECK-NEXT: sbcs r1, r5, r2 +; CHECK-NEXT: sbcs r1, r7, r5 +; CHECK-NEXT: vmov r5, r6, d24 +; CHECK-NEXT: mov r1, #0 +; CHECK-NEXT: movlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mvnne r1, #0 +; CHECK-NEXT: vdup.32 d7, r1 +; CHECK-NEXT: vmov r1, r4, d20 +; CHECK-NEXT: vdup.32 d6, r0 +; CHECK-NEXT: subs r1, r5, r1 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: sbcs r0, r6, r4 +; CHECK-NEXT: vst1.64 {d16, d17}, [r1:128]! +; CHECK-NEXT: vorr q8, q4, q4 ; CHECK-NEXT: movlt r12, #1 ; CHECK-NEXT: cmp r12, #0 +; CHECK-NEXT: vbsl q8, q14, q15 +; CHECK-NEXT: vdup.32 d29, lr +; CHECK-NEXT: vorr q15, q3, q3 ; CHECK-NEXT: mvnne r12, #0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: vdup.32 d27, r12 -; CHECK-NEXT: mvnne r6, #0 -; CHECK-NEXT: vdup.32 d26, r6 -; CHECK-NEXT: vorr q10, q13, q13 -; CHECK-NEXT: vbsl q10, q0, q1 -; CHECK-NEXT: vst1.64 {d4, d5}, [r0:128]! -; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128] +; CHECK-NEXT: vdup.32 d28, r12 ; CHECK-NEXT: add r0, r3, #64 -; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]! +; CHECK-NEXT: vbsl q15, q1, q0 +; CHECK-NEXT: vst1.64 {d26, d27}, [r1:128]! +; CHECK-NEXT: vbit q10, q12, q14 ; CHECK-NEXT: vst1.64 {d18, d19}, [r0:128]! -; CHECK-NEXT: vst1.64 {d24, d25}, [r0:128]! +; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128]! +; CHECK-NEXT: vst1.64 {d4, d5}, [r1:128]! +; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128]! +; CHECK-NEXT: vst1.64 {d30, d31}, [r1:128] ; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128] -; CHECK-NEXT: vpop {d8, d9, d10, d11} -; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: mov pc, lr ; COST: func_blend20 ; COST: cost of 0 {{.*}} icmp diff --git a/llvm/test/CodeGen/ARM/wide-compares.ll b/llvm/test/CodeGen/ARM/wide-compares.ll index 09e3592b6d420..9acf8d249ddf1 100644 --- a/llvm/test/CodeGen/ARM/wide-compares.ll +++ b/llvm/test/CodeGen/ARM/wide-compares.ll @@ -129,19 +129,16 @@ declare void @g() define i64 @test_slt_select(i64 %c, i64 %d, i64 %a, i64 %b) { ; CHECK-ARM-LABEL: test_slt_select: ; CHECK-ARM: @ %bb.0: @ %entry -; CHECK-ARM-NEXT: push {r4, r5, r6, r7, r11, lr} -; CHECK-ARM-NEXT: ldr r12, [sp, #32] -; CHECK-ARM-NEXT: mov r6, #0 -; CHECK-ARM-NEXT: ldr lr, [sp, #24] -; CHECK-ARM-NEXT: ldr r7, [sp, #36] -; CHECK-ARM-NEXT: ldr r5, [sp, #28] +; CHECK-ARM-NEXT: push {r4, r5, r6, lr} +; CHECK-ARM-NEXT: ldr r12, [sp, #24] +; CHECK-ARM-NEXT: ldr lr, [sp, #16] +; CHECK-ARM-NEXT: ldr r6, [sp, #28] +; CHECK-ARM-NEXT: ldr r5, [sp, #20] ; CHECK-ARM-NEXT: subs r4, lr, r12 -; CHECK-ARM-NEXT: sbcs r7, r5, r7 -; CHECK-ARM-NEXT: movwlo r6, #1 -; CHECK-ARM-NEXT: cmp r6, #0 -; CHECK-ARM-NEXT: moveq r0, r2 -; CHECK-ARM-NEXT: moveq r1, r3 -; CHECK-ARM-NEXT: pop {r4, r5, r6, r7, r11, pc} +; CHECK-ARM-NEXT: sbcs r6, r5, r6 +; CHECK-ARM-NEXT: movhs r0, r2 +; CHECK-ARM-NEXT: movhs r1, r3 +; CHECK-ARM-NEXT: pop {r4, r5, r6, pc} ; ; CHECK-THUMB1-NOMOV-LABEL: test_slt_select: ; CHECK-THUMB1-NOMOV: @ %bb.0: @ %entry @@ -157,22 +154,13 @@ define i64 @test_slt_select(i64 %c, i64 %d, i64 %a, i64 %b) { ; CHECK-THUMB1-NOMOV-NEXT: sbcs r5, r4 ; CHECK-THUMB1-NOMOV-NEXT: blo .LBB2_2 ; CHECK-THUMB1-NOMOV-NEXT: @ %bb.1: @ %entry -; CHECK-THUMB1-NOMOV-NEXT: movs r4, #0 -; CHECK-THUMB1-NOMOV-NEXT: cmp r4, #0 -; CHECK-THUMB1-NOMOV-NEXT: beq .LBB2_3 -; CHECK-THUMB1-NOMOV-NEXT: b .LBB2_4 -; CHECK-THUMB1-NOMOV-NEXT: .LBB2_2: -; CHECK-THUMB1-NOMOV-NEXT: movs r4, #1 -; CHECK-THUMB1-NOMOV-NEXT: cmp r4, #0 -; CHECK-THUMB1-NOMOV-NEXT: bne .LBB2_4 -; CHECK-THUMB1-NOMOV-NEXT: .LBB2_3: @ %entry -; CHECK-THUMB1-NOMOV-NEXT: movs r0, r2 -; CHECK-THUMB1-NOMOV-NEXT: .LBB2_4: @ %entry -; CHECK-THUMB1-NOMOV-NEXT: cmp r4, #0 -; CHECK-THUMB1-NOMOV-NEXT: bne .LBB2_6 -; CHECK-THUMB1-NOMOV-NEXT: @ %bb.5: @ %entry +; CHECK-THUMB1-NOMOV-NEXT: mov r12, r2 +; CHECK-THUMB1-NOMOV-NEXT: mov r0, r12 +; CHECK-THUMB1-NOMOV-NEXT: .LBB2_2: @ %entry +; CHECK-THUMB1-NOMOV-NEXT: blo .LBB2_4 +; CHECK-THUMB1-NOMOV-NEXT: @ %bb.3: @ %entry ; CHECK-THUMB1-NOMOV-NEXT: movs r1, r3 -; CHECK-THUMB1-NOMOV-NEXT: .LBB2_6: @ %entry +; CHECK-THUMB1-NOMOV-NEXT: .LBB2_4: @ %entry ; CHECK-THUMB1-NOMOV-NEXT: add sp, #4 ; CHECK-THUMB1-NOMOV-NEXT: pop {r4, r5, r6, r7} ; CHECK-THUMB1-NOMOV-NEXT: pop {r2} @@ -188,46 +176,31 @@ define i64 @test_slt_select(i64 %c, i64 %d, i64 %a, i64 %b) { ; CHECK-THUMB1-NEXT: ldr r7, [sp, #24] ; CHECK-THUMB1-NEXT: subs r6, r7, r6 ; CHECK-THUMB1-NEXT: sbcs r5, r4 -; CHECK-THUMB1-NEXT: blo .LBB2_2 +; CHECK-THUMB1-NEXT: bhs .LBB2_3 ; CHECK-THUMB1-NEXT: @ %bb.1: @ %entry -; CHECK-THUMB1-NEXT: movs r4, #0 -; CHECK-THUMB1-NEXT: cmp r4, #0 -; CHECK-THUMB1-NEXT: beq .LBB2_3 -; CHECK-THUMB1-NEXT: b .LBB2_4 -; CHECK-THUMB1-NEXT: .LBB2_2: -; CHECK-THUMB1-NEXT: movs r4, #1 -; CHECK-THUMB1-NEXT: cmp r4, #0 -; CHECK-THUMB1-NEXT: bne .LBB2_4 +; CHECK-THUMB1-NEXT: bhs .LBB2_4 +; CHECK-THUMB1-NEXT: .LBB2_2: @ %entry +; CHECK-THUMB1-NEXT: add sp, #4 +; CHECK-THUMB1-NEXT: pop {r4, r5, r6, r7, pc} ; CHECK-THUMB1-NEXT: .LBB2_3: @ %entry ; CHECK-THUMB1-NEXT: mov r0, r2 +; CHECK-THUMB1-NEXT: blo .LBB2_2 ; CHECK-THUMB1-NEXT: .LBB2_4: @ %entry -; CHECK-THUMB1-NEXT: cmp r4, #0 -; CHECK-THUMB1-NEXT: beq .LBB2_6 -; CHECK-THUMB1-NEXT: @ %bb.5: @ %entry -; CHECK-THUMB1-NEXT: add sp, #4 -; CHECK-THUMB1-NEXT: pop {r4, r5, r6, r7, pc} -; CHECK-THUMB1-NEXT: .LBB2_6: @ %entry ; CHECK-THUMB1-NEXT: mov r1, r3 ; CHECK-THUMB1-NEXT: add sp, #4 ; CHECK-THUMB1-NEXT: pop {r4, r5, r6, r7, pc} ; ; CHECK-THUMB2-LABEL: test_slt_select: ; CHECK-THUMB2: @ %bb.0: @ %entry -; CHECK-THUMB2-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-THUMB2-NEXT: sub sp, #4 -; CHECK-THUMB2-NEXT: ldrd r12, r7, [sp, #32] -; CHECK-THUMB2-NEXT: movs r6, #0 -; CHECK-THUMB2-NEXT: ldrd lr, r5, [sp, #24] +; CHECK-THUMB2-NEXT: push {r4, r5, r6, lr} +; CHECK-THUMB2-NEXT: ldrd r12, r6, [sp, #24] +; CHECK-THUMB2-NEXT: ldrd lr, r5, [sp, #16] ; CHECK-THUMB2-NEXT: subs.w r4, lr, r12 -; CHECK-THUMB2-NEXT: sbcs.w r7, r5, r7 -; CHECK-THUMB2-NEXT: it lo -; CHECK-THUMB2-NEXT: movlo r6, #1 -; CHECK-THUMB2-NEXT: cmp r6, #0 -; CHECK-THUMB2-NEXT: itt eq -; CHECK-THUMB2-NEXT: moveq r0, r2 -; CHECK-THUMB2-NEXT: moveq r1, r3 -; CHECK-THUMB2-NEXT: add sp, #4 -; CHECK-THUMB2-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-THUMB2-NEXT: sbcs.w r6, r5, r6 +; CHECK-THUMB2-NEXT: itt hs +; CHECK-THUMB2-NEXT: movhs r0, r2 +; CHECK-THUMB2-NEXT: movhs r1, r3 +; CHECK-THUMB2-NEXT: pop {r4, r5, r6, pc} entry: %cmp = icmp ult i64 %a, %b %r1 = select i1 %cmp, i64 %c, i64 %d diff --git a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll index fdfbf3393098e..9d69417e8f6b5 100644 --- a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll +++ b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll @@ -501,9 +501,8 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n ; CHECK-NEXT: lsls r2, r2, #16 ; CHECK-NEXT: str r2, [r1, #48] ; CHECK-NEXT: adds r1, #64 -; CHECK-NEXT: subs r3, r3, #4 ; CHECK-NEXT: adds r0, #32 -; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: subs r3, r3, #4 ; CHECK-NEXT: bne .LBB1_5 ; CHECK-NEXT: .LBB1_6: @ %while.end ; CHECK-NEXT: movs r2, #3 diff --git a/llvm/test/CodeGen/Thumb/select.ll b/llvm/test/CodeGen/Thumb/select.ll index 0065616c4646e..89cc4c10fe51d 100644 --- a/llvm/test/CodeGen/Thumb/select.ll +++ b/llvm/test/CodeGen/Thumb/select.ll @@ -94,8 +94,8 @@ entry: ; CHECK-LABEL: f8: ; CHECK: cmp r0, r1 ; CHECK: blt -; CHECK: movs -; CHECK: cmp r0, r1 +; CHECK: mov +; CHECK: mov ; CHECK: blt ; CHECK: movs ; CHECK: movs diff --git a/llvm/test/CodeGen/Thumb/smul_fix_sat.ll b/llvm/test/CodeGen/Thumb/smul_fix_sat.ll index f8557419c4199..8bc39ea0370a6 100644 --- a/llvm/test/CodeGen/Thumb/smul_fix_sat.ll +++ b/llvm/test/CodeGen/Thumb/smul_fix_sat.ll @@ -49,25 +49,24 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; ARM-NEXT: sub sp, #36 ; ARM-NEXT: str r3, [sp, #28] @ 4-byte Spill ; ARM-NEXT: mov r6, r1 -; ARM-NEXT: str r1, [sp, #8] @ 4-byte Spill +; ARM-NEXT: str r1, [sp, #4] @ 4-byte Spill ; ARM-NEXT: movs r4, #0 -; ARM-NEXT: str r4, [sp, #32] @ 4-byte Spill ; ARM-NEXT: mov r5, r0 -; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill ; ARM-NEXT: mov r1, r4 ; ARM-NEXT: mov r7, r2 -; ARM-NEXT: str r2, [sp, #16] @ 4-byte Spill +; ARM-NEXT: str r2, [sp, #12] @ 4-byte Spill ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul -; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill -; ARM-NEXT: str r1, [sp, #24] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #24] @ 4-byte Spill +; ARM-NEXT: str r1, [sp, #32] @ 4-byte Spill ; ARM-NEXT: mov r0, r6 ; ARM-NEXT: mov r1, r4 ; ARM-NEXT: mov r2, r7 ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: mov r6, r1 -; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; ARM-NEXT: ldr r1, [sp, #32] @ 4-byte Reload ; ARM-NEXT: adds r7, r0, r1 ; ARM-NEXT: adcs r6, r4 ; ARM-NEXT: mov r0, r5 @@ -77,118 +76,108 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: adds r0, r0, r7 -; ARM-NEXT: str r0, [sp, #24] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill ; ARM-NEXT: adcs r1, r4 ; ARM-NEXT: adds r0, r6, r1 -; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill ; ARM-NEXT: mov r6, r4 ; ARM-NEXT: adcs r6, r4 -; ARM-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; ARM-NEXT: ldr r7, [sp, #4] @ 4-byte Reload ; ARM-NEXT: mov r0, r7 ; ARM-NEXT: mov r1, r4 ; ARM-NEXT: mov r2, r5 +; ARM-NEXT: str r4, [sp, #32] @ 4-byte Spill ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: mov r5, r1 -; ARM-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload ; ARM-NEXT: adds r0, r0, r1 -; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill ; ARM-NEXT: adcs r5, r6 +; ARM-NEXT: mov r4, r7 ; ARM-NEXT: asrs r2, r7, #31 -; ARM-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; ARM-NEXT: ldr r4, [sp, #28] @ 4-byte Reload -; ARM-NEXT: mov r1, r4 +; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; ARM-NEXT: ldr r7, [sp, #28] @ 4-byte Reload +; ARM-NEXT: mov r1, r7 ; ARM-NEXT: mov r3, r2 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: mov r6, r0 -; ARM-NEXT: str r1, [sp, #16] @ 4-byte Spill -; ARM-NEXT: asrs r0, r4, #31 +; ARM-NEXT: str r1, [sp, #12] @ 4-byte Spill +; ARM-NEXT: asrs r0, r7, #31 ; ARM-NEXT: mov r1, r0 -; ARM-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; ARM-NEXT: mov r3, r7 +; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: adds r0, r0, r6 -; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; ARM-NEXT: adcs r1, r2 ; ARM-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; ARM-NEXT: adds r0, r2, r0 +; ARM-NEXT: adcs r1, r2 +; ARM-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; ARM-NEXT: adds r3, r2, r0 ; ARM-NEXT: adcs r1, r5 -; ARM-NEXT: rsbs r5, r1, #0 -; ARM-NEXT: adcs r5, r1 -; ARM-NEXT: movs r2, #1 -; ARM-NEXT: str r0, [sp, #28] @ 4-byte Spill -; ARM-NEXT: cmp r0, #1 -; ARM-NEXT: mov r3, r2 +; ARM-NEXT: rsbs r2, r1, #0 +; ARM-NEXT: adcs r2, r1 +; ARM-NEXT: movs r0, #1 +; ARM-NEXT: cmp r3, #1 +; ARM-NEXT: mov r5, r0 ; ARM-NEXT: bhi .LBB1_2 ; ARM-NEXT: @ %bb.1: -; ARM-NEXT: ldr r3, [sp, #32] @ 4-byte Reload +; ARM-NEXT: ldr r5, [sp, #32] @ 4-byte Reload ; ARM-NEXT: .LBB1_2: -; ARM-NEXT: ands r5, r3 +; ARM-NEXT: ands r2, r5 ; ARM-NEXT: cmp r1, #0 -; ARM-NEXT: mov r3, r2 +; ARM-NEXT: mov r5, r0 ; ARM-NEXT: bgt .LBB1_4 ; ARM-NEXT: @ %bb.3: -; ARM-NEXT: ldr r3, [sp, #32] @ 4-byte Reload +; ARM-NEXT: ldr r5, [sp, #32] @ 4-byte Reload ; ARM-NEXT: .LBB1_4: -; ARM-NEXT: orrs r3, r5 -; ARM-NEXT: ldr r0, [sp, #32] @ 4-byte Reload -; ARM-NEXT: mvns r6, r0 -; ARM-NEXT: cmp r3, #0 -; ARM-NEXT: str r6, [sp, #20] @ 4-byte Spill -; ARM-NEXT: bne .LBB1_6 +; ARM-NEXT: orrs r5, r2 +; ARM-NEXT: lsls r2, r3, #30 +; ARM-NEXT: ldr r6, [sp, #20] @ 4-byte Reload +; ARM-NEXT: lsrs r4, r6, #2 +; ARM-NEXT: adds r2, r2, r4 +; ARM-NEXT: lsls r4, r6, #30 +; ARM-NEXT: ldr r6, [sp, #24] @ 4-byte Reload +; ARM-NEXT: lsrs r6, r6, #2 +; ARM-NEXT: adds r7, r4, r6 +; ARM-NEXT: ldr r4, [sp, #32] @ 4-byte Reload +; ARM-NEXT: mvns r6, r4 +; ARM-NEXT: cmp r5, #0 +; ARM-NEXT: beq .LBB1_6 ; ARM-NEXT: @ %bb.5: -; ARM-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; ARM-NEXT: lsls r0, r0, #30 -; ARM-NEXT: ldr r4, [sp, #4] @ 4-byte Reload -; ARM-NEXT: lsrs r4, r4, #2 -; ARM-NEXT: adds r0, r0, r4 -; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill +; ARM-NEXT: ldr r2, .LCPI1_0 ; ARM-NEXT: .LBB1_6: -; ARM-NEXT: adds r0, r1, #1 -; ARM-NEXT: rsbs r7, r0, #0 -; ARM-NEXT: adcs r7, r0 -; ARM-NEXT: mvns r0, r2 -; ARM-NEXT: ldr r5, [sp, #28] @ 4-byte Reload -; ARM-NEXT: cmp r5, r0 -; ARM-NEXT: mov r0, r2 -; ARM-NEXT: blo .LBB1_8 +; ARM-NEXT: mov r5, r6 +; ARM-NEXT: bne .LBB1_8 ; ARM-NEXT: @ %bb.7: -; ARM-NEXT: ldr r0, [sp, #32] @ 4-byte Reload +; ARM-NEXT: mov r5, r7 ; ARM-NEXT: .LBB1_8: -; ARM-NEXT: ands r7, r0 -; ARM-NEXT: cmp r1, r6 -; ARM-NEXT: mov r6, r2 -; ARM-NEXT: bge .LBB1_12 +; ARM-NEXT: adds r4, r1, #1 +; ARM-NEXT: rsbs r7, r4, #0 +; ARM-NEXT: adcs r7, r4 +; ARM-NEXT: mvns r4, r0 +; ARM-NEXT: cmp r3, r4 +; ARM-NEXT: mov r3, r0 +; ARM-NEXT: blo .LBB1_10 ; ARM-NEXT: @ %bb.9: -; ARM-NEXT: orrs r6, r7 -; ARM-NEXT: beq .LBB1_13 +; ARM-NEXT: ldr r3, [sp, #32] @ 4-byte Reload ; ARM-NEXT: .LBB1_10: -; ARM-NEXT: cmp r3, #0 -; ARM-NEXT: bne .LBB1_14 -; ARM-NEXT: .LBB1_11: -; ARM-NEXT: ldr r0, [sp, #28] @ 4-byte Reload -; ARM-NEXT: lsls r0, r0, #30 -; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload -; ARM-NEXT: lsrs r1, r1, #2 -; ARM-NEXT: adds r1, r0, r1 -; ARM-NEXT: cmp r6, #0 -; ARM-NEXT: bne .LBB1_15 -; ARM-NEXT: b .LBB1_16 +; ARM-NEXT: ands r7, r3 +; ARM-NEXT: cmp r1, r6 +; ARM-NEXT: mov r3, r0 +; ARM-NEXT: blt .LBB1_12 +; ARM-NEXT: @ %bb.11: +; ARM-NEXT: ldr r3, [sp, #32] @ 4-byte Reload ; ARM-NEXT: .LBB1_12: -; ARM-NEXT: ldr r6, [sp, #32] @ 4-byte Reload -; ARM-NEXT: orrs r6, r7 -; ARM-NEXT: bne .LBB1_10 -; ARM-NEXT: .LBB1_13: -; ARM-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; ARM-NEXT: str r0, [sp, #32] @ 4-byte Spill +; ARM-NEXT: orrs r3, r7 +; ARM-NEXT: lsls r1, r0, #31 ; ARM-NEXT: cmp r3, #0 -; ARM-NEXT: beq .LBB1_11 +; ARM-NEXT: bne .LBB1_14 +; ARM-NEXT: @ %bb.13: +; ARM-NEXT: str r5, [sp, #32] @ 4-byte Spill ; ARM-NEXT: .LBB1_14: -; ARM-NEXT: ldr r1, .LCPI1_0 -; ARM-NEXT: cmp r6, #0 -; ARM-NEXT: beq .LBB1_16 -; ARM-NEXT: .LBB1_15: -; ARM-NEXT: lsls r1, r2, #31 +; ARM-NEXT: bne .LBB1_16 +; ARM-NEXT: @ %bb.15: +; ARM-NEXT: mov r1, r2 ; ARM-NEXT: .LBB1_16: ; ARM-NEXT: ldr r0, [sp, #32] @ 4-byte Reload ; ARM-NEXT: add sp, #36 @@ -282,44 +271,44 @@ define i64 @func5(i64 %x, i64 %y) { ; ARM-NEXT: push {r4, r5, r6, r7, lr} ; ARM-NEXT: .pad #28 ; ARM-NEXT: sub sp, #28 -; ARM-NEXT: str r3, [sp, #12] @ 4-byte Spill -; ARM-NEXT: mov r5, r2 +; ARM-NEXT: str r3, [sp, #24] @ 4-byte Spill +; ARM-NEXT: mov r4, r2 ; ARM-NEXT: str r2, [sp, #16] @ 4-byte Spill -; ARM-NEXT: mov r6, r1 +; ARM-NEXT: mov r5, r1 ; ARM-NEXT: movs r7, #0 -; ARM-NEXT: mov r4, r0 +; ARM-NEXT: mov r6, r0 ; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill ; ARM-NEXT: mov r1, r7 ; ARM-NEXT: mov r3, r7 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill -; ARM-NEXT: str r1, [sp, #24] @ 4-byte Spill -; ARM-NEXT: mov r0, r6 +; ARM-NEXT: str r1, [sp, #12] @ 4-byte Spill +; ARM-NEXT: mov r0, r5 ; ARM-NEXT: mov r1, r7 -; ARM-NEXT: mov r2, r5 +; ARM-NEXT: mov r2, r4 ; ARM-NEXT: mov r3, r7 ; ARM-NEXT: bl __aeabi_lmul -; ARM-NEXT: mov r5, r1 -; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; ARM-NEXT: mov r4, r1 +; ARM-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; ARM-NEXT: adds r0, r0, r1 -; ARM-NEXT: str r0, [sp, #24] @ 4-byte Spill -; ARM-NEXT: adcs r5, r7 -; ARM-NEXT: mov r0, r4 +; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill +; ARM-NEXT: adcs r4, r7 +; ARM-NEXT: mov r0, r6 ; ARM-NEXT: mov r1, r7 -; ARM-NEXT: ldr r4, [sp, #12] @ 4-byte Reload -; ARM-NEXT: mov r2, r4 +; ARM-NEXT: ldr r2, [sp, #24] @ 4-byte Reload ; ARM-NEXT: mov r3, r7 ; ARM-NEXT: bl __aeabi_lmul -; ARM-NEXT: ldr r2, [sp, #24] @ 4-byte Reload +; ARM-NEXT: ldr r2, [sp, #12] @ 4-byte Reload ; ARM-NEXT: adds r0, r0, r2 -; ARM-NEXT: str r0, [sp, #24] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill ; ARM-NEXT: adcs r1, r7 -; ARM-NEXT: adds r0, r5, r1 +; ARM-NEXT: adds r0, r4, r1 ; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill -; ARM-NEXT: mov r5, r7 -; ARM-NEXT: adcs r5, r7 -; ARM-NEXT: mov r0, r6 +; ARM-NEXT: mov r6, r7 +; ARM-NEXT: adcs r6, r7 +; ARM-NEXT: mov r0, r5 ; ARM-NEXT: mov r1, r7 +; ARM-NEXT: ldr r4, [sp, #24] @ 4-byte Reload ; ARM-NEXT: mov r2, r4 ; ARM-NEXT: mov r3, r7 ; ARM-NEXT: bl __aeabi_lmul @@ -327,50 +316,48 @@ define i64 @func5(i64 %x, i64 %y) { ; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload ; ARM-NEXT: adds r0, r0, r1 ; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill -; ARM-NEXT: adcs r7, r5 -; ARM-NEXT: asrs r2, r6, #31 +; ARM-NEXT: adcs r7, r6 +; ARM-NEXT: asrs r2, r5, #31 ; ARM-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; ARM-NEXT: mov r5, r4 +; ARM-NEXT: mov r6, r4 ; ARM-NEXT: mov r1, r4 ; ARM-NEXT: mov r3, r2 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: mov r4, r0 ; ARM-NEXT: str r1, [sp, #16] @ 4-byte Spill -; ARM-NEXT: asrs r0, r5, #31 +; ARM-NEXT: asrs r0, r6, #31 ; ARM-NEXT: mov r1, r0 ; ARM-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; ARM-NEXT: mov r3, r6 +; ARM-NEXT: mov r3, r5 ; ARM-NEXT: bl __aeabi_lmul +; ARM-NEXT: ldr r3, [sp, #12] @ 4-byte Reload ; ARM-NEXT: adds r0, r0, r4 -; ARM-NEXT: ldr r4, [sp, #24] @ 4-byte Reload ; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload ; ARM-NEXT: adcs r1, r2 ; ARM-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; ARM-NEXT: adds r3, r2, r0 +; ARM-NEXT: adds r2, r2, r0 ; ARM-NEXT: adcs r1, r7 -; ARM-NEXT: asrs r2, r4, #31 -; ARM-NEXT: eors r1, r2 -; ARM-NEXT: eors r3, r2 -; ARM-NEXT: orrs r3, r1 -; ARM-NEXT: eors r6, r5 -; ARM-NEXT: asrs r1, r6, #31 -; ARM-NEXT: cmp r3, #0 -; ARM-NEXT: bne .LBB4_3 +; ARM-NEXT: asrs r0, r3, #31 +; ARM-NEXT: eors r1, r0 +; ARM-NEXT: eors r2, r0 +; ARM-NEXT: orrs r2, r1 +; ARM-NEXT: eors r5, r6 +; ARM-NEXT: asrs r0, r5, #31 +; ARM-NEXT: ldr r1, .LCPI4_0 +; ARM-NEXT: eors r1, r0 +; ARM-NEXT: mvns r0, r0 +; ARM-NEXT: cmp r2, #0 +; ARM-NEXT: beq .LBB4_3 ; ARM-NEXT: @ %bb.1: -; ARM-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; ARM-NEXT: cmp r3, #0 ; ARM-NEXT: beq .LBB4_4 ; ARM-NEXT: .LBB4_2: -; ARM-NEXT: ldr r2, .LCPI4_0 -; ARM-NEXT: eors r1, r2 ; ARM-NEXT: add sp, #28 ; ARM-NEXT: pop {r4, r5, r6, r7, pc} ; ARM-NEXT: .LBB4_3: -; ARM-NEXT: mvns r0, r1 -; ARM-NEXT: cmp r3, #0 +; ARM-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; ARM-NEXT: bne .LBB4_2 ; ARM-NEXT: .LBB4_4: -; ARM-NEXT: mov r1, r4 +; ARM-NEXT: mov r1, r3 ; ARM-NEXT: add sp, #28 ; ARM-NEXT: pop {r4, r5, r6, r7, pc} ; ARM-NEXT: .p2align 2 @@ -483,79 +470,67 @@ define i64 @func7(i64 %x, i64 %y) nounwind { ; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload ; ARM-NEXT: ldr r3, [sp, #20] @ 4-byte Reload ; ARM-NEXT: bl __aeabi_lmul -; ARM-NEXT: mov r2, r1 ; ARM-NEXT: adds r0, r0, r4 -; ARM-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; ARM-NEXT: adcs r2, r1 -; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload -; ARM-NEXT: adds r0, r1, r0 -; ARM-NEXT: adcs r2, r7 -; ARM-NEXT: rsbs r5, r2, #0 -; ARM-NEXT: adcs r5, r2 -; ARM-NEXT: movs r4, #1 +; ARM-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; ARM-NEXT: adcs r1, r2 +; ARM-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; ARM-NEXT: adds r0, r2, r0 +; ARM-NEXT: adcs r1, r7 +; ARM-NEXT: rsbs r5, r1, #0 +; ARM-NEXT: adcs r5, r1 +; ARM-NEXT: movs r2, #1 ; ARM-NEXT: str r0, [sp, #24] @ 4-byte Spill ; ARM-NEXT: cmp r0, #0 -; ARM-NEXT: mov r3, r4 -; ARM-NEXT: bmi .LBB6_2 +; ARM-NEXT: mov r3, r2 +; ARM-NEXT: bge .LBB6_2 ; ARM-NEXT: @ %bb.1: ; ARM-NEXT: mov r3, r6 ; ARM-NEXT: .LBB6_2: -; ARM-NEXT: ands r5, r3 -; ARM-NEXT: cmp r2, #0 -; ARM-NEXT: mov r1, r4 -; ARM-NEXT: mov r3, r4 -; ARM-NEXT: bgt .LBB6_4 +; ARM-NEXT: mov r4, r2 +; ARM-NEXT: bmi .LBB6_4 ; ARM-NEXT: @ %bb.3: -; ARM-NEXT: mov r3, r6 +; ARM-NEXT: mov r4, r6 ; ARM-NEXT: .LBB6_4: -; ARM-NEXT: orrs r3, r5 -; ARM-NEXT: mvns r4, r6 -; ARM-NEXT: cmp r3, #0 -; ARM-NEXT: mov r5, r4 -; ARM-NEXT: bne .LBB6_6 +; ARM-NEXT: ands r5, r4 +; ARM-NEXT: cmp r1, #0 +; ARM-NEXT: mov r7, r2 +; ARM-NEXT: bgt .LBB6_6 ; ARM-NEXT: @ %bb.5: -; ARM-NEXT: ldr r5, [sp, #4] @ 4-byte Reload +; ARM-NEXT: mov r7, r6 ; ARM-NEXT: .LBB6_6: -; ARM-NEXT: adds r0, r2, #1 -; ARM-NEXT: rsbs r7, r0, #0 -; ARM-NEXT: adcs r7, r0 -; ARM-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; ARM-NEXT: cmp r0, #0 -; ARM-NEXT: mov r0, r1 -; ARM-NEXT: bge .LBB6_8 +; ARM-NEXT: orrs r7, r5 +; ARM-NEXT: mvns r4, r6 +; ARM-NEXT: cmp r7, #0 +; ARM-NEXT: beq .LBB6_8 ; ARM-NEXT: @ %bb.7: -; ARM-NEXT: mov r0, r6 +; ARM-NEXT: ldr r0, .LCPI6_0 +; ARM-NEXT: str r0, [sp, #24] @ 4-byte Spill ; ARM-NEXT: .LBB6_8: -; ARM-NEXT: ands r7, r0 -; ARM-NEXT: cmp r2, r4 -; ARM-NEXT: mov r0, r1 -; ARM-NEXT: mov r2, r1 -; ARM-NEXT: bge .LBB6_12 +; ARM-NEXT: mov r5, r4 +; ARM-NEXT: bne .LBB6_10 ; ARM-NEXT: @ %bb.9: -; ARM-NEXT: orrs r2, r7 -; ARM-NEXT: beq .LBB6_13 +; ARM-NEXT: ldr r5, [sp, #4] @ 4-byte Reload ; ARM-NEXT: .LBB6_10: +; ARM-NEXT: adds r0, r1, #1 +; ARM-NEXT: rsbs r7, r0, #0 +; ARM-NEXT: adcs r7, r0 +; ARM-NEXT: ands r7, r3 +; ARM-NEXT: cmp r1, r4 +; ARM-NEXT: mov r3, r2 +; ARM-NEXT: blt .LBB6_12 +; ARM-NEXT: @ %bb.11: +; ARM-NEXT: mov r3, r6 +; ARM-NEXT: .LBB6_12: +; ARM-NEXT: orrs r3, r7 +; ARM-NEXT: lsls r1, r2, #31 ; ARM-NEXT: cmp r3, #0 ; ARM-NEXT: bne .LBB6_14 -; ARM-NEXT: .LBB6_11: -; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload -; ARM-NEXT: cmp r2, #0 -; ARM-NEXT: bne .LBB6_15 -; ARM-NEXT: b .LBB6_16 -; ARM-NEXT: .LBB6_12: -; ARM-NEXT: mov r2, r6 -; ARM-NEXT: orrs r2, r7 -; ARM-NEXT: bne .LBB6_10 -; ARM-NEXT: .LBB6_13: +; ARM-NEXT: @ %bb.13: ; ARM-NEXT: mov r6, r5 -; ARM-NEXT: cmp r3, #0 -; ARM-NEXT: beq .LBB6_11 ; ARM-NEXT: .LBB6_14: -; ARM-NEXT: ldr r1, .LCPI6_0 -; ARM-NEXT: cmp r2, #0 -; ARM-NEXT: beq .LBB6_16 -; ARM-NEXT: .LBB6_15: -; ARM-NEXT: lsls r1, r0, #31 +; ARM-NEXT: bne .LBB6_16 +; ARM-NEXT: @ %bb.15: +; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload ; ARM-NEXT: .LBB6_16: ; ARM-NEXT: mov r0, r6 ; ARM-NEXT: add sp, #28 @@ -577,23 +552,23 @@ define i64 @func8(i64 %x, i64 %y) nounwind { ; ARM-NEXT: sub sp, #28 ; ARM-NEXT: str r3, [sp, #24] @ 4-byte Spill ; ARM-NEXT: mov r5, r2 -; ARM-NEXT: str r2, [sp, #12] @ 4-byte Spill +; ARM-NEXT: str r2, [sp, #8] @ 4-byte Spill ; ARM-NEXT: mov r4, r1 ; ARM-NEXT: str r1, [sp, #20] @ 4-byte Spill ; ARM-NEXT: movs r7, #0 ; ARM-NEXT: mov r6, r0 -; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill ; ARM-NEXT: mov r1, r7 ; ARM-NEXT: mov r3, r7 ; ARM-NEXT: bl __aeabi_lmul -; ARM-NEXT: str r1, [sp, #8] @ 4-byte Spill +; ARM-NEXT: str r1, [sp, #16] @ 4-byte Spill ; ARM-NEXT: mov r0, r4 ; ARM-NEXT: mov r1, r7 ; ARM-NEXT: mov r2, r5 ; ARM-NEXT: mov r3, r7 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: mov r5, r1 -; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; ARM-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; ARM-NEXT: adds r4, r0, r1 ; ARM-NEXT: adcs r5, r7 ; ARM-NEXT: mov r0, r6 @@ -603,10 +578,10 @@ define i64 @func8(i64 %x, i64 %y) nounwind { ; ARM-NEXT: mov r3, r7 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: adds r0, r0, r4 -; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill ; ARM-NEXT: adcs r1, r7 ; ARM-NEXT: adds r0, r5, r1 -; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill ; ARM-NEXT: mov r4, r7 ; ARM-NEXT: adcs r4, r7 ; ARM-NEXT: ldr r5, [sp, #20] @ 4-byte Reload @@ -616,75 +591,70 @@ define i64 @func8(i64 %x, i64 %y) nounwind { ; ARM-NEXT: mov r3, r7 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: mov r6, r1 -; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; ARM-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; ARM-NEXT: adds r0, r0, r1 -; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill ; ARM-NEXT: adcs r6, r4 ; ARM-NEXT: asrs r2, r5, #31 -; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; ARM-NEXT: ldr r0, [sp, #8] @ 4-byte Reload ; ARM-NEXT: ldr r4, [sp, #24] @ 4-byte Reload ; ARM-NEXT: mov r1, r4 ; ARM-NEXT: mov r3, r2 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: mov r5, r0 -; ARM-NEXT: str r1, [sp, #12] @ 4-byte Spill +; ARM-NEXT: str r1, [sp, #8] @ 4-byte Spill ; ARM-NEXT: asrs r0, r4, #31 ; ARM-NEXT: mov r1, r0 -; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; ARM-NEXT: ldr r2, [sp, #12] @ 4-byte Reload ; ARM-NEXT: ldr r3, [sp, #20] @ 4-byte Reload ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: adds r0, r0, r5 -; ARM-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; ARM-NEXT: adcs r1, r2 ; ARM-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; ARM-NEXT: adds r0, r2, r0 +; ARM-NEXT: adcs r1, r2 +; ARM-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; ARM-NEXT: adds r2, r2, r0 ; ARM-NEXT: adcs r1, r6 -; ARM-NEXT: ldr r2, .LCPI7_0 -; ARM-NEXT: cmp r1, r2 -; ARM-NEXT: bgt .LBB7_2 +; ARM-NEXT: lsls r0, r1, #1 +; ARM-NEXT: lsrs r3, r2, #31 +; ARM-NEXT: adds r0, r0, r3 +; ARM-NEXT: lsls r2, r2, #1 +; ARM-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; ARM-NEXT: lsrs r3, r3, #31 +; ARM-NEXT: adds r2, r2, r3 +; ARM-NEXT: mvns r3, r7 +; ARM-NEXT: ldr r4, .LCPI7_1 +; ARM-NEXT: cmp r1, r4 +; ARM-NEXT: ble .LBB7_2 ; ARM-NEXT: @ %bb.1: -; ARM-NEXT: lsls r3, r0, #1 -; ARM-NEXT: ldr r4, [sp, #4] @ 4-byte Reload -; ARM-NEXT: lsrs r4, r4, #31 -; ARM-NEXT: adds r5, r3, r4 -; ARM-NEXT: b .LBB7_3 +; ARM-NEXT: ldr r0, .LCPI7_0 ; ARM-NEXT: .LBB7_2: -; ARM-NEXT: mvns r5, r7 -; ARM-NEXT: .LBB7_3: -; ARM-NEXT: movs r3, #3 -; ARM-NEXT: lsls r3, r3, #30 -; ARM-NEXT: cmp r1, r3 -; ARM-NEXT: blt .LBB7_5 -; ARM-NEXT: @ %bb.4: -; ARM-NEXT: mov r7, r5 -; ARM-NEXT: .LBB7_5: -; ARM-NEXT: cmp r1, r2 -; ARM-NEXT: bgt .LBB7_7 -; ARM-NEXT: @ %bb.6: -; ARM-NEXT: lsls r2, r1, #1 -; ARM-NEXT: lsrs r0, r0, #31 -; ARM-NEXT: adds r2, r2, r0 -; ARM-NEXT: cmp r1, r3 +; ARM-NEXT: bgt .LBB7_4 +; ARM-NEXT: @ %bb.3: +; ARM-NEXT: mov r3, r2 +; ARM-NEXT: .LBB7_4: +; ARM-NEXT: movs r2, #1 +; ARM-NEXT: lsls r2, r2, #31 +; ARM-NEXT: movs r4, #3 +; ARM-NEXT: lsls r4, r4, #30 +; ARM-NEXT: cmp r1, r4 +; ARM-NEXT: blt .LBB7_6 +; ARM-NEXT: @ %bb.5: +; ARM-NEXT: mov r7, r3 +; ARM-NEXT: .LBB7_6: ; ARM-NEXT: blt .LBB7_8 -; ARM-NEXT: b .LBB7_9 -; ARM-NEXT: .LBB7_7: -; ARM-NEXT: ldr r2, .LCPI7_1 -; ARM-NEXT: cmp r1, r3 -; ARM-NEXT: bge .LBB7_9 +; ARM-NEXT: @ %bb.7: +; ARM-NEXT: mov r2, r0 ; ARM-NEXT: .LBB7_8: -; ARM-NEXT: movs r0, #1 -; ARM-NEXT: lsls r2, r0, #31 -; ARM-NEXT: .LBB7_9: ; ARM-NEXT: mov r0, r7 ; ARM-NEXT: mov r1, r2 ; ARM-NEXT: add sp, #28 ; ARM-NEXT: pop {r4, r5, r6, r7, pc} ; ARM-NEXT: .p2align 2 -; ARM-NEXT: @ %bb.10: +; ARM-NEXT: @ %bb.9: ; ARM-NEXT: .LCPI7_0: -; ARM-NEXT: .long 1073741823 @ 0x3fffffff -; ARM-NEXT: .LCPI7_1: ; ARM-NEXT: .long 2147483647 @ 0x7fffffff +; ARM-NEXT: .LCPI7_1: +; ARM-NEXT: .long 1073741823 @ 0x3fffffff %tmp = call i64 @llvm.smul.fix.sat.i64(i64 %x, i64 %y, i32 63) ret i64 %tmp } diff --git a/llvm/test/CodeGen/Thumb/stack-guard-xo.ll b/llvm/test/CodeGen/Thumb/stack-guard-xo.ll index c344e4f7fc022..421583d07ac7f 100644 --- a/llvm/test/CodeGen/Thumb/stack-guard-xo.ll +++ b/llvm/test/CodeGen/Thumb/stack-guard-xo.ll @@ -33,9 +33,9 @@ define dso_local i32 @main() #0 { ; V6M-LABEL: main: ; V6M: @ %bb.0: @ %entry ; V6M-NEXT: push {r7, lr} -; V6M-NEXT: sub sp, # +; V6M-NEXT: sub sp, #24 ; V6M-NEXT: movs r0, #0 -; V6M-NEXT: str r0, [sp, # +; V6M-NEXT: str r0, [sp, #4] ; V6M-NEXT: mrs r12, apsr ; V6M-NEXT: movs r0, :upper8_15:__stack_chk_guard ; V6M-NEXT: lsls r0, r0, #8 @@ -46,10 +46,10 @@ define dso_local i32 @main() #0 { ; V6M-NEXT: adds r0, :lower0_7:__stack_chk_guard ; V6M-NEXT: msr apsr, r12 ; V6M-NEXT: ldr r0, [r0] -; V6M-NEXT: str r0, [sp, # -; V6M-NEXT: add r0, sp, # +; V6M-NEXT: str r0, [sp, #20] +; V6M-NEXT: add r0, sp, #8 ; V6M-NEXT: ldrb r0, [r0] -; V6M-NEXT: ldr r1, [sp, # +; V6M-NEXT: ldr r1, [sp, #20] ; V6M-NEXT: mrs r12, apsr ; V6M-NEXT: movs r2, :upper8_15:__stack_chk_guard ; V6M-NEXT: lsls r2, r2, #8 @@ -63,7 +63,7 @@ define dso_local i32 @main() #0 { ; V6M-NEXT: cmp r2, r1 ; V6M-NEXT: bne .LBB0_2 ; V6M-NEXT: @ %bb.1: @ %entry -; V6M-NEXT: add sp, # +; V6M-NEXT: add sp, #24 ; V6M-NEXT: pop {r7, pc} ; V6M-NEXT: .LBB0_2: @ %entry ; V6M-NEXT: bl __stack_chk_fail @@ -105,77 +105,177 @@ entry: @bb = hidden local_unnamed_addr global i64 0, align 8 define dso_local i64 @cc() local_unnamed_addr #1 { +; CHECK-LABEL: cc: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: lsls r3, r0, #31 +; CHECK-NEXT: ldr r0, .LCPI1_0 +; CHECK-NEXT: ldr r2, [r0] +; CHECK-NEXT: asrs r4, r2, #31 +; CHECK-NEXT: eors r3, r4 +; CHECK-NEXT: ldr r0, .LCPI1_1 +; CHECK-NEXT: ldm r0!, {r1, r5} +; CHECK-NEXT: subs r0, r2, r1 +; CHECK-NEXT: sbcs r3, r5 +; CHECK-NEXT: subs r0, r2, r1 +; CHECK-NEXT: ldr r1, .LCPI1_2 +; CHECK-NEXT: ldr r1, [r1] +; CHECK-NEXT: str r1, [sp, #4] +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: sbcs r1, r5 +; CHECK-NEXT: ands r3, r4 +; CHECK-NEXT: ands r2, r0 +; CHECK-NEXT: mov r4, r2 +; CHECK-NEXT: orrs r4, r3 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: beq .LBB1_2 +; CHECK-NEXT: @ %bb.1: @ %entry +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: .LBB1_2: @ %entry +; CHECK-NEXT: beq .LBB1_4 +; CHECK-NEXT: @ %bb.3: @ %entry +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: .LBB1_4: @ %entry +; CHECK-NEXT: ldr r2, [sp, #4] +; CHECK-NEXT: ldr r3, .LCPI1_2 +; CHECK-NEXT: ldr r3, [r3] +; CHECK-NEXT: cmp r3, r2 +; CHECK-NEXT: bne .LBB1_6 +; CHECK-NEXT: @ %bb.5: @ %entry +; CHECK-NEXT: add sp, #8 +; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: .LBB1_6: @ %entry +; CHECK-NEXT: bl __stack_chk_fail +; CHECK-NEXT: .p2align 2 +; CHECK-NEXT: @ %bb.7: +; CHECK-NEXT: .LCPI1_0: +; CHECK-NEXT: .long aa +; CHECK-NEXT: .LCPI1_1: +; CHECK-NEXT: .long bb +; CHECK-NEXT: .LCPI1_2: +; CHECK-NEXT: .long __stack_chk_guard +; ; V6M-LABEL: cc: ; V6M: @ %bb.0: @ %entry -; V6M-NEXT: push {r4, r5, r7, lr} -; V6M-NEXT: sub sp, #8 -; V6M-NEXT: movs r0, #1 -; V6M-NEXT: lsls r3, r0, #31 -; V6M-NEXT: movs r0, :upper8_15:aa -; V6M-NEXT: lsls r0, r0, #8 -; V6M-NEXT: adds r0, :upper0_7:aa -; V6M-NEXT: lsls r0, r0, #8 -; V6M-NEXT: adds r0, :lower8_15:aa -; V6M-NEXT: lsls r0, r0, #8 -; V6M-NEXT: adds r0, :lower0_7:aa -; V6M-NEXT: ldr r2, [r0] -; V6M-NEXT: asrs r4, r2, #31 -; V6M-NEXT: eors r3, r4 -; V6M-NEXT: movs r0, :upper8_15:bb -; V6M-NEXT: lsls r0, r0, #8 -; V6M-NEXT: adds r0, :upper0_7:bb -; V6M-NEXT: lsls r0, r0, #8 -; V6M-NEXT: adds r0, :lower8_15:bb -; V6M-NEXT: lsls r0, r0, #8 -; V6M-NEXT: adds r0, :lower0_7:bb -; V6M-NEXT: ldm r0!, {r1, r5} -; V6M-NEXT: subs r0, r2, r1 -; V6M-NEXT: sbcs r3, r5 -; V6M-NEXT: subs r0, r2, r1 -; V6M-NEXT: mrs r12, apsr -; V6M-NEXT: movs r1, :upper8_15:__stack_chk_guard -; V6M-NEXT: lsls r1, r1, #8 -; V6M-NEXT: adds r1, :upper0_7:__stack_chk_guard -; V6M-NEXT: lsls r1, r1, #8 -; V6M-NEXT: adds r1, :lower8_15:__stack_chk_guard -; V6M-NEXT: lsls r1, r1, #8 -; V6M-NEXT: adds r1, :lower0_7:__stack_chk_guard -; V6M-NEXT: msr apsr, r12 -; V6M-NEXT: ldr r1, [r1] -; V6M-NEXT: str r1, [sp, #4] -; V6M-NEXT: mov r1, r4 -; V6M-NEXT: sbcs r1, r5 -; V6M-NEXT: ands r3, r4 -; V6M-NEXT: ands r2, r0 -; V6M-NEXT: mov r4, r2 -; V6M-NEXT: orrs r4, r3 -; V6M-NEXT: beq .LBB1_2 -; V6M-NEXT: @ %bb.1: @ %entry -; V6M-NEXT: mov r1, r3 -; V6M-NEXT: .LBB1_2: @ %entry -; V6M-NEXT: cmp r4, #0 -; V6M-NEXT: beq .LBB1_4 -; V6M-NEXT: @ %bb.3: @ %entry -; V6M-NEXT: mov r0, r2 -; V6M-NEXT: .LBB1_4: @ %entry -; V6M-NEXT: ldr r2, [sp, #4] -; V6M-NEXT: mrs r12, apsr -; V6M-NEXT: movs r3, :upper8_15:__stack_chk_guard -; V6M-NEXT: lsls r3, r3, #8 -; V6M-NEXT: adds r3, :upper0_7:__stack_chk_guard -; V6M-NEXT: lsls r3, r3, #8 -; V6M-NEXT: adds r3, :lower8_15:__stack_chk_guard -; V6M-NEXT: lsls r3, r3, #8 -; V6M-NEXT: adds r3, :lower0_7:__stack_chk_guard -; V6M-NEXT: msr apsr, r12 -; V6M-NEXT: ldr r3, [r3] -; V6M-NEXT: cmp r3, r2 -; V6M-NEXT: bne .LBB1_6 -; V6M-NEXT: @ %bb.5: @ %entry -; V6M-NEXT: add sp, #8 -; V6M-NEXT: pop {r4, r5, r7, pc} -; V6M-NEXT: .LBB1_6: @ %entry -; V6M-NEXT: bl __stack_chk_fail +; V6M-NEXT: push {r4, r5, r7, lr} +; V6M-NEXT: sub sp, #8 +; V6M-NEXT: movs r0, #1 +; V6M-NEXT: lsls r3, r0, #31 +; V6M-NEXT: movs r0, :upper8_15:aa +; V6M-NEXT: lsls r0, r0, #8 +; V6M-NEXT: adds r0, :upper0_7:aa +; V6M-NEXT: lsls r0, r0, #8 +; V6M-NEXT: adds r0, :lower8_15:aa +; V6M-NEXT: lsls r0, r0, #8 +; V6M-NEXT: adds r0, :lower0_7:aa +; V6M-NEXT: ldr r2, [r0] +; V6M-NEXT: asrs r4, r2, #31 +; V6M-NEXT: eors r3, r4 +; V6M-NEXT: movs r0, :upper8_15:bb +; V6M-NEXT: lsls r0, r0, #8 +; V6M-NEXT: adds r0, :upper0_7:bb +; V6M-NEXT: lsls r0, r0, #8 +; V6M-NEXT: adds r0, :lower8_15:bb +; V6M-NEXT: lsls r0, r0, #8 +; V6M-NEXT: adds r0, :lower0_7:bb +; V6M-NEXT: ldm r0!, {r1, r5} +; V6M-NEXT: subs r0, r2, r1 +; V6M-NEXT: sbcs r3, r5 +; V6M-NEXT: subs r0, r2, r1 +; V6M-NEXT: mrs r12, apsr +; V6M-NEXT: movs r1, :upper8_15:__stack_chk_guard +; V6M-NEXT: lsls r1, r1, #8 +; V6M-NEXT: adds r1, :upper0_7:__stack_chk_guard +; V6M-NEXT: lsls r1, r1, #8 +; V6M-NEXT: adds r1, :lower8_15:__stack_chk_guard +; V6M-NEXT: lsls r1, r1, #8 +; V6M-NEXT: adds r1, :lower0_7:__stack_chk_guard +; V6M-NEXT: msr apsr, r12 +; V6M-NEXT: ldr r1, [r1] +; V6M-NEXT: str r1, [sp, #4] +; V6M-NEXT: mov r1, r4 +; V6M-NEXT: sbcs r1, r5 +; V6M-NEXT: ands r3, r4 +; V6M-NEXT: ands r2, r0 +; V6M-NEXT: mov r4, r2 +; V6M-NEXT: orrs r4, r3 +; V6M-NEXT: cmp r4, #0 +; V6M-NEXT: beq .LBB1_2 +; V6M-NEXT: @ %bb.1: @ %entry +; V6M-NEXT: mov r1, r3 +; V6M-NEXT: .LBB1_2: @ %entry +; V6M-NEXT: beq .LBB1_4 +; V6M-NEXT: @ %bb.3: @ %entry +; V6M-NEXT: mov r0, r2 +; V6M-NEXT: .LBB1_4: @ %entry +; V6M-NEXT: ldr r2, [sp, #4] +; V6M-NEXT: mrs r12, apsr +; V6M-NEXT: movs r3, :upper8_15:__stack_chk_guard +; V6M-NEXT: lsls r3, r3, #8 +; V6M-NEXT: adds r3, :upper0_7:__stack_chk_guard +; V6M-NEXT: lsls r3, r3, #8 +; V6M-NEXT: adds r3, :lower8_15:__stack_chk_guard +; V6M-NEXT: lsls r3, r3, #8 +; V6M-NEXT: adds r3, :lower0_7:__stack_chk_guard +; V6M-NEXT: msr apsr, r12 +; V6M-NEXT: ldr r3, [r3] +; V6M-NEXT: cmp r3, r2 +; V6M-NEXT: bne .LBB1_6 +; V6M-NEXT: @ %bb.5: @ %entry +; V6M-NEXT: add sp, #8 +; V6M-NEXT: pop {r4, r5, r7, pc} +; V6M-NEXT: .LBB1_6: @ %entry +; V6M-NEXT: bl __stack_chk_fail +; +; V8MBASE-LABEL: cc: +; V8MBASE: @ %bb.0: @ %entry +; V8MBASE-NEXT: push {r4, r5, r7, lr} +; V8MBASE-NEXT: sub sp, #8 +; V8MBASE-NEXT: movs r0, #1 +; V8MBASE-NEXT: lsls r3, r0, #31 +; V8MBASE-NEXT: movw r0, :lower16:aa +; V8MBASE-NEXT: movt r0, :upper16:aa +; V8MBASE-NEXT: ldr r2, [r0] +; V8MBASE-NEXT: asrs r4, r2, #31 +; V8MBASE-NEXT: eors r3, r4 +; V8MBASE-NEXT: movw r0, :lower16:bb +; V8MBASE-NEXT: movt r0, :upper16:bb +; V8MBASE-NEXT: ldm r0!, {r1, r5} +; V8MBASE-NEXT: subs r0, r2, r1 +; V8MBASE-NEXT: sbcs r3, r5 +; V8MBASE-NEXT: subs r0, r2, r1 +; V8MBASE-NEXT: movw r1, :lower16:__stack_chk_guard +; V8MBASE-NEXT: movt r1, :upper16:__stack_chk_guard +; V8MBASE-NEXT: ldr r1, [r1] +; V8MBASE-NEXT: str r1, [sp, #4] +; V8MBASE-NEXT: mov r1, r4 +; V8MBASE-NEXT: sbcs r1, r5 +; V8MBASE-NEXT: ands r3, r4 +; V8MBASE-NEXT: ands r2, r0 +; V8MBASE-NEXT: mov r4, r2 +; V8MBASE-NEXT: orrs r4, r3 +; V8MBASE-NEXT: cmp r4, #0 +; V8MBASE-NEXT: beq .LBB1_2 +; V8MBASE-NEXT: @ %bb.1: @ %entry +; V8MBASE-NEXT: mov r1, r3 +; V8MBASE-NEXT: .LBB1_2: @ %entry +; V8MBASE-NEXT: beq .LBB1_4 +; V8MBASE-NEXT: @ %bb.3: @ %entry +; V8MBASE-NEXT: mov r0, r2 +; V8MBASE-NEXT: .LBB1_4: @ %entry +; V8MBASE-NEXT: ldr r2, [sp, #4] +; V8MBASE-NEXT: movw r3, :lower16:__stack_chk_guard +; V8MBASE-NEXT: movt r3, :upper16:__stack_chk_guard +; V8MBASE-NEXT: ldr r3, [r3] +; V8MBASE-NEXT: cmp r3, r2 +; V8MBASE-NEXT: bne .LBB1_6 +; V8MBASE-NEXT: @ %bb.5: @ %entry +; V8MBASE-NEXT: add sp, #8 +; V8MBASE-NEXT: pop {r4, r5, r7, pc} +; V8MBASE-NEXT: .LBB1_6: @ %entry +; V8MBASE-NEXT: bl __stack_chk_fail entry: %0 = load i32, ptr @aa, align 4 diff --git a/llvm/test/CodeGen/Thumb/umul_fix_sat.ll b/llvm/test/CodeGen/Thumb/umul_fix_sat.ll index fa88024315211..a43b22102c24b 100644 --- a/llvm/test/CodeGen/Thumb/umul_fix_sat.ll +++ b/llvm/test/CodeGen/Thumb/umul_fix_sat.ll @@ -38,26 +38,26 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; ARM-NEXT: sub sp, #28 ; ARM-NEXT: str r3, [sp, #24] @ 4-byte Spill ; ARM-NEXT: mov r5, r1 -; ARM-NEXT: str r1, [sp, #8] @ 4-byte Spill +; ARM-NEXT: str r1, [sp, #4] @ 4-byte Spill ; ARM-NEXT: movs r4, #0 ; ARM-NEXT: mov r6, r0 -; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill ; ARM-NEXT: mov r1, r4 ; ARM-NEXT: mov r7, r2 -; ARM-NEXT: str r2, [sp, #16] @ 4-byte Spill +; ARM-NEXT: str r2, [sp, #12] @ 4-byte Spill ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul -; ARM-NEXT: str r0, [sp] @ 4-byte Spill -; ARM-NEXT: str r1, [sp, #20] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill +; ARM-NEXT: str r1, [sp, #16] @ 4-byte Spill ; ARM-NEXT: mov r0, r5 ; ARM-NEXT: mov r1, r4 ; ARM-NEXT: mov r2, r7 ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: mov r5, r1 -; ARM-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; ARM-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; ARM-NEXT: adds r0, r0, r1 -; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill ; ARM-NEXT: adcs r5, r4 ; ARM-NEXT: mov r0, r6 ; ARM-NEXT: mov r1, r4 @@ -65,33 +65,33 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; ARM-NEXT: mov r2, r7 ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul -; ARM-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; ARM-NEXT: ldr r2, [sp, #16] @ 4-byte Reload ; ARM-NEXT: adds r0, r0, r2 -; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill ; ARM-NEXT: adcs r1, r4 ; ARM-NEXT: adds r0, r5, r1 -; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill +; ARM-NEXT: str r0, [sp] @ 4-byte Spill ; ARM-NEXT: mov r6, r4 ; ARM-NEXT: adcs r6, r4 -; ARM-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; ARM-NEXT: ldr r5, [sp, #4] @ 4-byte Reload ; ARM-NEXT: mov r0, r5 ; ARM-NEXT: mov r1, r4 ; ARM-NEXT: mov r2, r7 ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: mov r7, r1 -; ARM-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; ARM-NEXT: ldr r1, [sp] @ 4-byte Reload ; ARM-NEXT: adds r0, r0, r1 -; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill +; ARM-NEXT: str r0, [sp] @ 4-byte Spill ; ARM-NEXT: adcs r7, r6 -; ARM-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload ; ARM-NEXT: mov r2, r4 ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: mov r6, r0 ; ARM-NEXT: str r1, [sp, #24] @ 4-byte Spill -; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; ARM-NEXT: ldr r0, [sp, #8] @ 4-byte Reload ; ARM-NEXT: mov r1, r5 ; ARM-NEXT: mov r2, r4 ; ARM-NEXT: mov r3, r4 @@ -99,34 +99,33 @@ define i64 @func2(i64 %x, i64 %y) nounwind { ; ARM-NEXT: adds r0, r0, r6 ; ARM-NEXT: ldr r2, [sp, #24] @ 4-byte Reload ; ARM-NEXT: adcs r1, r2 -; ARM-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; ARM-NEXT: adds r2, r2, r0 +; ARM-NEXT: ldr r2, [sp] @ 4-byte Reload +; ARM-NEXT: adds r0, r2, r0 ; ARM-NEXT: adcs r1, r7 -; ARM-NEXT: lsrs r3, r2, #2 -; ARM-NEXT: orrs r3, r1 +; ARM-NEXT: lsrs r5, r0, #2 +; ARM-NEXT: orrs r5, r1 +; ARM-NEXT: lsls r0, r0, #30 +; ARM-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; ARM-NEXT: lsrs r1, r3, #2 +; ARM-NEXT: adds r2, r0, r1 +; ARM-NEXT: lsls r0, r3, #30 +; ARM-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; ARM-NEXT: lsrs r1, r1, #2 +; ARM-NEXT: adds r3, r0, r1 ; ARM-NEXT: mvns r1, r4 -; ARM-NEXT: cmp r3, #0 +; ARM-NEXT: cmp r5, #0 ; ARM-NEXT: mov r0, r1 ; ARM-NEXT: beq .LBB1_3 ; ARM-NEXT: @ %bb.1: -; ARM-NEXT: cmp r3, #0 ; ARM-NEXT: beq .LBB1_4 ; ARM-NEXT: .LBB1_2: ; ARM-NEXT: add sp, #28 ; ARM-NEXT: pop {r4, r5, r6, r7, pc} ; ARM-NEXT: .LBB1_3: -; ARM-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; ARM-NEXT: lsls r0, r0, #30 -; ARM-NEXT: ldr r4, [sp] @ 4-byte Reload -; ARM-NEXT: lsrs r4, r4, #2 -; ARM-NEXT: adds r0, r0, r4 -; ARM-NEXT: cmp r3, #0 +; ARM-NEXT: mov r0, r3 ; ARM-NEXT: bne .LBB1_2 ; ARM-NEXT: .LBB1_4: -; ARM-NEXT: lsls r1, r2, #30 -; ARM-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; ARM-NEXT: lsrs r2, r2, #2 -; ARM-NEXT: adds r1, r1, r2 +; ARM-NEXT: mov r1, r2 ; ARM-NEXT: add sp, #28 ; ARM-NEXT: pop {r4, r5, r6, r7, pc} %tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 2) @@ -225,17 +224,16 @@ define i64 @func5(i64 %x, i64 %y) { ; ARM-NEXT: mov r3, r5 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: adds r3, r1, r6 -; ARM-NEXT: mov r6, r5 -; ARM-NEXT: adcs r6, r5 -; ARM-NEXT: orrs r6, r4 +; ARM-NEXT: mov r2, r5 +; ARM-NEXT: adcs r2, r5 +; ARM-NEXT: orrs r2, r4 ; ARM-NEXT: mvns r1, r5 -; ARM-NEXT: cmp r6, #0 +; ARM-NEXT: cmp r2, #0 ; ARM-NEXT: mov r2, r1 ; ARM-NEXT: bne .LBB4_2 ; ARM-NEXT: @ %bb.1: ; ARM-NEXT: mov r2, r0 ; ARM-NEXT: .LBB4_2: -; ARM-NEXT: cmp r6, #0 ; ARM-NEXT: bne .LBB4_4 ; ARM-NEXT: @ %bb.3: ; ARM-NEXT: mov r1, r3 @@ -399,25 +397,27 @@ define i64 @func7(i64 %x, i64 %y) nounwind { ; ARM-NEXT: mov r2, r4 ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul +; ARM-NEXT: mov r2, r1 ; ARM-NEXT: adds r0, r0, r5 -; ARM-NEXT: ldr r2, [sp, #24] @ 4-byte Reload -; ARM-NEXT: adcs r1, r2 -; ARM-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; ARM-NEXT: adds r3, r2, r0 -; ARM-NEXT: adcs r1, r6 -; ARM-NEXT: mvns r2, r4 -; ARM-NEXT: cmp r1, #0 -; ARM-NEXT: mov r0, r2 -; ARM-NEXT: bne .LBB7_2 +; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; ARM-NEXT: adcs r2, r1 +; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; ARM-NEXT: adds r3, r1, r0 +; ARM-NEXT: adcs r2, r6 +; ARM-NEXT: mvns r1, r4 +; ARM-NEXT: cmp r2, #0 +; ARM-NEXT: mov r0, r1 +; ARM-NEXT: beq .LBB7_3 ; ARM-NEXT: @ %bb.1: -; ARM-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; ARM-NEXT: beq .LBB7_4 ; ARM-NEXT: .LBB7_2: -; ARM-NEXT: cmp r1, #0 -; ARM-NEXT: bne .LBB7_4 -; ARM-NEXT: @ %bb.3: -; ARM-NEXT: mov r2, r3 +; ARM-NEXT: add sp, #28 +; ARM-NEXT: pop {r4, r5, r6, r7, pc} +; ARM-NEXT: .LBB7_3: +; ARM-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; ARM-NEXT: bne .LBB7_2 ; ARM-NEXT: .LBB7_4: -; ARM-NEXT: mov r1, r2 +; ARM-NEXT: mov r1, r3 ; ARM-NEXT: add sp, #28 ; ARM-NEXT: pop {r4, r5, r6, r7, pc} %tmp = call i64 @llvm.umul.fix.sat.i64(i64 %x, i64 %y, i32 32) @@ -433,23 +433,23 @@ define i64 @func8(i64 %x, i64 %y) nounwind { ; ARM-NEXT: sub sp, #28 ; ARM-NEXT: str r3, [sp, #24] @ 4-byte Spill ; ARM-NEXT: mov r7, r2 -; ARM-NEXT: str r2, [sp, #20] @ 4-byte Spill +; ARM-NEXT: str r2, [sp, #16] @ 4-byte Spill ; ARM-NEXT: mov r5, r1 -; ARM-NEXT: str r1, [sp, #12] @ 4-byte Spill +; ARM-NEXT: str r1, [sp, #8] @ 4-byte Spill ; ARM-NEXT: movs r4, #0 ; ARM-NEXT: mov r6, r0 -; ARM-NEXT: str r0, [sp, #16] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #12] @ 4-byte Spill ; ARM-NEXT: mov r1, r4 ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul -; ARM-NEXT: str r1, [sp, #8] @ 4-byte Spill +; ARM-NEXT: str r1, [sp, #20] @ 4-byte Spill ; ARM-NEXT: mov r0, r5 ; ARM-NEXT: mov r1, r4 ; ARM-NEXT: mov r2, r7 ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: mov r7, r1 -; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; ARM-NEXT: ldr r1, [sp, #20] @ 4-byte Reload ; ARM-NEXT: adds r5, r0, r1 ; ARM-NEXT: adcs r7, r4 ; ARM-NEXT: mov r0, r6 @@ -459,31 +459,31 @@ define i64 @func8(i64 %x, i64 %y) nounwind { ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: adds r0, r0, r5 -; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #20] @ 4-byte Spill ; ARM-NEXT: adcs r1, r4 ; ARM-NEXT: adds r0, r7, r1 -; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill ; ARM-NEXT: mov r5, r4 ; ARM-NEXT: adcs r5, r4 -; ARM-NEXT: ldr r7, [sp, #12] @ 4-byte Reload +; ARM-NEXT: ldr r7, [sp, #8] @ 4-byte Reload ; ARM-NEXT: mov r0, r7 ; ARM-NEXT: mov r1, r4 ; ARM-NEXT: mov r2, r6 ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: mov r6, r1 -; ARM-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; ARM-NEXT: ldr r1, [sp, #4] @ 4-byte Reload ; ARM-NEXT: adds r0, r0, r1 -; ARM-NEXT: str r0, [sp, #8] @ 4-byte Spill +; ARM-NEXT: str r0, [sp, #4] @ 4-byte Spill ; ARM-NEXT: adcs r6, r5 -; ARM-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; ARM-NEXT: ldr r0, [sp, #16] @ 4-byte Reload ; ARM-NEXT: ldr r1, [sp, #24] @ 4-byte Reload ; ARM-NEXT: mov r2, r4 ; ARM-NEXT: mov r3, r4 ; ARM-NEXT: bl __aeabi_lmul ; ARM-NEXT: mov r5, r0 ; ARM-NEXT: str r1, [sp, #24] @ 4-byte Spill -; ARM-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; ARM-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; ARM-NEXT: mov r1, r7 ; ARM-NEXT: mov r2, r4 ; ARM-NEXT: mov r3, r4 @@ -491,25 +491,28 @@ define i64 @func8(i64 %x, i64 %y) nounwind { ; ARM-NEXT: adds r0, r0, r5 ; ARM-NEXT: ldr r2, [sp, #24] @ 4-byte Reload ; ARM-NEXT: adcs r1, r2 -; ARM-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; ARM-NEXT: adds r5, r2, r0 +; ARM-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; ARM-NEXT: adds r0, r2, r0 ; ARM-NEXT: adcs r1, r6 -; ARM-NEXT: lsrs r3, r5, #31 -; ARM-NEXT: mvns r2, r4 -; ARM-NEXT: cmp r3, #0 -; ARM-NEXT: mov r0, r2 -; ARM-NEXT: bne .LBB8_2 +; ARM-NEXT: lsls r1, r1, #1 +; ARM-NEXT: lsrs r5, r0, #31 +; ARM-NEXT: adds r2, r1, r5 +; ARM-NEXT: lsls r0, r0, #1 +; ARM-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; ARM-NEXT: lsrs r1, r1, #31 +; ARM-NEXT: adds r3, r0, r1 +; ARM-NEXT: mvns r1, r4 +; ARM-NEXT: cmp r5, #0 +; ARM-NEXT: mov r0, r1 +; ARM-NEXT: beq .LBB8_3 ; ARM-NEXT: @ %bb.1: -; ARM-NEXT: lsls r0, r5, #1 -; ARM-NEXT: ldr r4, [sp, #4] @ 4-byte Reload -; ARM-NEXT: lsrs r4, r4, #31 -; ARM-NEXT: adds r0, r0, r4 +; ARM-NEXT: beq .LBB8_4 ; ARM-NEXT: .LBB8_2: -; ARM-NEXT: cmp r3, #0 -; ARM-NEXT: bne .LBB8_4 -; ARM-NEXT: @ %bb.3: -; ARM-NEXT: lsls r1, r1, #1 -; ARM-NEXT: adds r2, r1, r3 +; ARM-NEXT: add sp, #28 +; ARM-NEXT: pop {r4, r5, r6, r7, pc} +; ARM-NEXT: .LBB8_3: +; ARM-NEXT: mov r0, r3 +; ARM-NEXT: bne .LBB8_2 ; ARM-NEXT: .LBB8_4: ; ARM-NEXT: mov r1, r2 ; ARM-NEXT: add sp, #28 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll index 13080fcfa1357..a87d363fa61ee 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/arm_cmplx_dot_prod_f32.ll @@ -15,13 +15,13 @@ define void @arm_cmplx_dot_prod_f32(ptr %pSrcA, ptr %pSrcB, i32 %numSamples, ptr ; CHECK-NEXT: lsrs r4, r2, #2 ; CHECK-NEXT: mov.w lr, #2 ; CHECK-NEXT: cmp r4, #2 +; CHECK-NEXT: vldrw.u32 q2, [r1], #32 +; CHECK-NEXT: vldrw.u32 q1, [r0], #32 ; CHECK-NEXT: it lt ; CHECK-NEXT: lsrlt.w lr, r2, #2 ; CHECK-NEXT: rsb r4, lr, r2, lsr #2 -; CHECK-NEXT: vldrw.u32 q2, [r1], #32 -; CHECK-NEXT: add.w lr, r4, #1 -; CHECK-NEXT: vldrw.u32 q1, [r0], #32 ; CHECK-NEXT: vmov.i32 q0, #0x0 +; CHECK-NEXT: add.w lr, r4, #1 ; CHECK-NEXT: .LBB0_2: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vcmla.f32 q0, q1, q2, #0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll index b9a80af649f29..3c1510623e5c4 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll @@ -7,26 +7,26 @@ define void @foo(ptr nocapture readonly %st, ptr %x) { ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: ldrd r12, r2, [r0] -; CHECK-NEXT: ldrd r4, r3, [r0, #8] -; CHECK-NEXT: rsb r12, r12, r2, lsl #1 +; CHECK-NEXT: ldrd r12, r3, [r0] +; CHECK-NEXT: ldrd r4, r2, [r0, #8] +; CHECK-NEXT: rsb r12, r12, r3, lsl #1 ; CHECK-NEXT: dlstp.16 lr, r12 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.u16 q0, [r3], #16 +; CHECK-NEXT: vldrh.u16 q0, [r2], #16 ; CHECK-NEXT: vstrh.16 q0, [r4], #16 ; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %do.end -; CHECK-NEXT: ldr r2, [r0] +; CHECK-NEXT: ldr r3, [r0] ; CHECK-NEXT: ldr r0, [r0, #8] ; CHECK-NEXT: add.w r0, r0, r12, lsl #1 -; CHECK-NEXT: mov.w r3, #6144 -; CHECK-NEXT: dlstp.16 lr, r2 +; CHECK-NEXT: mov.w r2, #6144 +; CHECK-NEXT: dlstp.16 lr, r3 ; CHECK-NEXT: .LBB0_3: @ %do.body6 ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u16 q0, [r1], #16 ; CHECK-NEXT: vcvt.f16.s16 q0, q0 -; CHECK-NEXT: vmul.f16 q0, q0, r3 +; CHECK-NEXT: vmul.f16 q0, q0, r2 ; CHECK-NEXT: vstrh.16 q0, [r0], #16 ; CHECK-NEXT: letp lr, .LBB0_3 ; CHECK-NEXT: @ %bb.4: @ %do.end13 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll index 2fdf534d52656..6cb98557c9bc1 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll @@ -9,12 +9,12 @@ define arm_aapcs_vfpcc void @fast_float_mul(ptr nocapture %a, ptr nocapture read ; CHECK-NEXT: beq.w .LBB0_11 ; CHECK-NEXT: @ %bb.1: @ %vector.memcheck ; CHECK-NEXT: add.w r4, r2, r3, lsl #2 -; CHECK-NEXT: add.w lr, r0, r3, lsl #2 ; CHECK-NEXT: cmp r4, r0 -; CHECK-NEXT: cset r4, hi -; CHECK-NEXT: cmp lr, r2 -; CHECK-NEXT: csel r12, zr, r4, ls -; CHECK-NEXT: cmp lr, r1 +; CHECK-NEXT: add.w r4, r0, r3, lsl #2 +; CHECK-NEXT: cset r12, hi +; CHECK-NEXT: cmp r4, r2 +; CHECK-NEXT: csel r12, zr, r12, ls +; CHECK-NEXT: cmp r4, r1 ; CHECK-NEXT: add.w r4, r1, r3, lsl #2 ; CHECK-NEXT: cset lr, hi ; CHECK-NEXT: cmp r4, r0 diff --git a/llvm/test/CodeGen/Thumb2/float-ops.ll b/llvm/test/CodeGen/Thumb2/float-ops.ll index d2b1dd6f05a3f..191c775be3420 100644 --- a/llvm/test/CodeGen/Thumb2/float-ops.ll +++ b/llvm/test/CodeGen/Thumb2/float-ops.ll @@ -289,15 +289,15 @@ define float @select_f(float %a, float %b, i1 %c) { define double @select_d(double %a, double %b, i1 %c) { ; CHECK-LABEL: select_d: ; NOREGS: ldr{{(.w)?}} [[REG:r[0-9]+]], [sp] -; NOREGS: ands [[REG]], [[REG]], #1 -; ONLYREGS: ands r0, r0, #1 +; NOREGS: lsls.w [[REG]], [[REG]], #31 +; ONLYREGS: lsls r0, r0, #31 ; NOREGS-DAG: moveq r0, r2 ; NOREGS-DAG: moveq r1, r3 -; ONLYREGS-DAG: csel r0, r0, r2 -; ONLYREGS-DAG: csel r1, r1, r3 -; SP: ands r0, r0, #1 +; ONLYREGS-DAG: csel r0, r2, r1 +; ONLYREGS-DAG: csel r1, r12, r3 ; SP-DAG: vmov [[ALO:r[0-9]+]], [[AHI:r[0-9]+]], d0 ; SP-DAG: vmov [[BLO:r[0-9]+]], [[BHI:r[0-9]+]], d1 +; SP: lsls r0, r0, #31 ; SP: itt ne ; SP-DAG: movne [[BLO]], [[ALO]] ; SP-DAG: movne [[BHI]], [[AHI]] diff --git a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll index 7087041e8dace..d076cb00ad7e0 100644 --- a/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll +++ b/llvm/test/CodeGen/Thumb2/mve-blockplacement.ll @@ -489,10 +489,10 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) { ; CHECK-NEXT: @ Parent Loop BB1_10 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 ; CHECK-NEXT: vqadd.u32 q2, q5, r1 -; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: adds r1, #4 ; CHECK-NEXT: vcmp.u32 hi, q7, q2 ; CHECK-NEXT: vshl.i32 q2, q1, #2 -; CHECK-NEXT: add.w r1, r1, #4 +; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vadd.i32 q2, q2, r8 ; CHECK-NEXT: vadd.i32 q1, q1, r9 ; CHECK-NEXT: vpst @@ -508,10 +508,10 @@ define i32 @d(i64 %e, i32 %f, i64 %g, i32 %h) { ; CHECK-NEXT: @ Parent Loop BB1_10 Depth=2 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=3 ; CHECK-NEXT: vqadd.u32 q2, q5, r1 -; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: adds r1, #4 ; CHECK-NEXT: vcmp.u32 hi, q6, q2 ; CHECK-NEXT: vshl.i32 q2, q1, #2 -; CHECK-NEXT: add.w r1, r1, #4 +; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vadd.i32 q2, q2, r8 ; CHECK-NEXT: vadd.i32 q1, q1, r9 ; CHECK-NEXT: vpst diff --git a/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll b/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll index 67723e8aa41ad..9ee6ec345d964 100644 --- a/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll +++ b/llvm/test/CodeGen/Thumb2/mve-doublereduct.ll @@ -411,12 +411,12 @@ define i32 @nested_smin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) { ; CHECK: @ %bb.0: ; CHECK-NEXT: mvn r3, #-2147483648 ; CHECK-NEXT: mvn r2, #-2147483648 -; CHECK-NEXT: vminv.s32 r3, q0 -; CHECK-NEXT: vminv.s32 r2, q1 -; CHECK-NEXT: cmp r3, r0 -; CHECK-NEXT: csel r0, r3, r0, lt -; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: csel r1, r2, r1, lt +; CHECK-NEXT: vminv.s32 r3, q1 +; CHECK-NEXT: vminv.s32 r2, q0 +; CHECK-NEXT: cmp r3, r1 +; CHECK-NEXT: csel r1, r3, r1, lt +; CHECK-NEXT: cmp r2, r0 +; CHECK-NEXT: csel r0, r2, r0, lt ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: csel r0, r0, r1, lt ; CHECK-NEXT: bx lr @@ -433,12 +433,12 @@ define i32 @nested_smax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) { ; CHECK: @ %bb.0: ; CHECK-NEXT: mov.w r3, #-2147483648 ; CHECK-NEXT: mov.w r2, #-2147483648 -; CHECK-NEXT: vmaxv.s32 r3, q0 -; CHECK-NEXT: vmaxv.s32 r2, q1 -; CHECK-NEXT: cmp r3, r0 -; CHECK-NEXT: csel r0, r3, r0, gt -; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: csel r1, r2, r1, gt +; CHECK-NEXT: vmaxv.s32 r3, q1 +; CHECK-NEXT: vmaxv.s32 r2, q0 +; CHECK-NEXT: cmp r3, r1 +; CHECK-NEXT: csel r1, r3, r1, gt +; CHECK-NEXT: cmp r2, r0 +; CHECK-NEXT: csel r0, r2, r0, gt ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: csel r0, r0, r1, gt ; CHECK-NEXT: bx lr @@ -455,12 +455,12 @@ define i32 @nested_umin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) { ; CHECK: @ %bb.0: ; CHECK-NEXT: mov.w r3, #-1 ; CHECK-NEXT: mov.w r2, #-1 -; CHECK-NEXT: vminv.u32 r3, q0 -; CHECK-NEXT: vminv.u32 r2, q1 -; CHECK-NEXT: cmp r3, r0 -; CHECK-NEXT: csel r0, r3, r0, lo -; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: csel r1, r2, r1, lo +; CHECK-NEXT: vminv.u32 r3, q1 +; CHECK-NEXT: vminv.u32 r2, q0 +; CHECK-NEXT: cmp r3, r1 +; CHECK-NEXT: csel r1, r3, r1, lo +; CHECK-NEXT: cmp r2, r0 +; CHECK-NEXT: csel r0, r2, r0, lo ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: csel r0, r0, r1, lo ; CHECK-NEXT: bx lr @@ -477,12 +477,12 @@ define i32 @nested_umax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) { ; CHECK: @ %bb.0: ; CHECK-NEXT: movs r3, #0 ; CHECK-NEXT: movs r2, #0 -; CHECK-NEXT: vmaxv.u32 r3, q0 -; CHECK-NEXT: vmaxv.u32 r2, q1 -; CHECK-NEXT: cmp r3, r0 -; CHECK-NEXT: csel r0, r3, r0, hi -; CHECK-NEXT: cmp r2, r1 -; CHECK-NEXT: csel r1, r2, r1, hi +; CHECK-NEXT: vmaxv.u32 r3, q1 +; CHECK-NEXT: vmaxv.u32 r2, q0 +; CHECK-NEXT: cmp r3, r1 +; CHECK-NEXT: csel r1, r3, r1, hi +; CHECK-NEXT: cmp r2, r0 +; CHECK-NEXT: csel r0, r2, r0, hi ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: csel r0, r0, r1, hi ; CHECK-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll index 1c95d28b5eed1..c8dd949ca9d88 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll @@ -1365,8 +1365,8 @@ define void @arm_biquad_cascade_df2T_f16(ptr nocapture readonly %S, ptr nocaptur ; CHECK-NEXT: @ in Loop: Header=BB17_3 Depth=1 ; CHECK-NEXT: vstr.16 s5, [r6, #2] ; CHECK-NEXT: add.w r12, r12, #10 +; CHECK-NEXT: adds r6, #4 ; CHECK-NEXT: subs.w r9, r9, #1 -; CHECK-NEXT: add.w r6, r6, #4 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: beq .LBB17_8 ; CHECK-NEXT: .LBB17_3: @ %do.body diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index 808626d9a0aeb..495ffe809f70f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1375,8 +1375,8 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_stereo_df2T_f32(ptr nocapture re ; CHECK-NEXT: le lr, .LBB17_3 ; CHECK-NEXT: @ %bb.4: @ %bb75 ; CHECK-NEXT: @ in Loop: Header=BB17_2 Depth=1 +; CHECK-NEXT: adds r3, #20 ; CHECK-NEXT: subs.w r12, r12, #1 -; CHECK-NEXT: add.w r3, r3, #20 ; CHECK-NEXT: vstrb.8 q3, [r0], #16 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: bne .LBB17_2 @@ -1514,8 +1514,8 @@ define arm_aapcs_vfpcc void @fms(ptr nocapture readonly %pSrc1, ptr nocapture re ; CHECK-NEXT: le lr, .LBB18_3 ; CHECK-NEXT: @ %bb.4: @ %while.end ; CHECK-NEXT: @ in Loop: Header=BB18_2 Depth=1 +; CHECK-NEXT: adds r2, #4 ; CHECK-NEXT: subs.w r12, r12, #1 -; CHECK-NEXT: add.w r2, r2, #4 ; CHECK-NEXT: bne .LBB18_2 ; CHECK-NEXT: .LBB18_5: @ %do.end ; CHECK-NEXT: pop {r4, r5, r7, pc} @@ -1918,8 +1918,8 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 ; CHECK-NEXT: vstr s6, [r6, #4] ; CHECK-NEXT: add.w r12, r12, #20 +; CHECK-NEXT: adds r6, #8 ; CHECK-NEXT: subs r0, #1 -; CHECK-NEXT: add.w r6, r6, #8 ; CHECK-NEXT: mov r1, r2 ; CHECK-NEXT: beq .LBB20_8 ; CHECK-NEXT: .LBB20_3: @ %do.body diff --git a/llvm/test/CodeGen/Thumb2/mve-fmas.ll b/llvm/test/CodeGen/Thumb2/mve-fmas.ll index 377440e1bbc93..94921c78ad912 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fmas.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fmas.ll @@ -893,19 +893,19 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v1_pred(<4 x float> %src1, <4 x float ; ; CHECK-MVE-LABEL: vfma32_v1_pred: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s5, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, #0 -; CHECK-MVE-NEXT: vmov.f32 s14, s2 -; CHECK-MVE-NEXT: vmov.f32 s12, s3 -; CHECK-MVE-NEXT: vmla.f32 s14, s6, s10 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s7, #0 +; CHECK-MVE-NEXT: vmov.f32 s12, s2 +; CHECK-MVE-NEXT: vmov.f32 s14, s3 +; CHECK-MVE-NEXT: vmla.f32 s12, s6, s10 ; CHECK-MVE-NEXT: vmov.f32 s10, s1 -; CHECK-MVE-NEXT: vmla.f32 s12, s7, s11 +; CHECK-MVE-NEXT: vmla.f32 s14, s7, s11 ; CHECK-MVE-NEXT: vmla.f32 s10, s5, s9 ; CHECK-MVE-NEXT: vmov.f32 s9, s0 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s7, #0 +; CHECK-MVE-NEXT: vcmp.f32 s5, #0 ; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -913,13 +913,13 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v1_pred(<4 x float> %src1, <4 x float ; CHECK-MVE-NEXT: cset r2, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s12 ; CHECK-MVE-NEXT: cmp r3, #0 -; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s14 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9 ; CHECK-MVE-NEXT: bx lr entry: @@ -946,19 +946,19 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v2_pred(<4 x float> %src1, <4 x float ; ; CHECK-MVE-LABEL: vfma32_v2_pred: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s5, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, #0 -; CHECK-MVE-NEXT: vmov.f32 s14, s2 -; CHECK-MVE-NEXT: vmov.f32 s12, s3 -; CHECK-MVE-NEXT: vmla.f32 s14, s6, s10 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s7, #0 +; CHECK-MVE-NEXT: vmov.f32 s12, s2 +; CHECK-MVE-NEXT: vmov.f32 s14, s3 +; CHECK-MVE-NEXT: vmla.f32 s12, s6, s10 ; CHECK-MVE-NEXT: vmov.f32 s10, s1 -; CHECK-MVE-NEXT: vmla.f32 s12, s7, s11 +; CHECK-MVE-NEXT: vmla.f32 s14, s7, s11 ; CHECK-MVE-NEXT: vmla.f32 s10, s5, s9 ; CHECK-MVE-NEXT: vmov.f32 s9, s0 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s7, #0 +; CHECK-MVE-NEXT: vcmp.f32 s5, #0 ; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -966,13 +966,13 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v2_pred(<4 x float> %src1, <4 x float ; CHECK-MVE-NEXT: cset r2, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s12 ; CHECK-MVE-NEXT: cmp r3, #0 -; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s14 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9 ; CHECK-MVE-NEXT: bx lr entry: @@ -999,19 +999,19 @@ define arm_aapcs_vfpcc <4 x float> @vfms32_pred(<4 x float> %src1, <4 x float> % ; ; CHECK-MVE-LABEL: vfms32_pred: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s5, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, #0 -; CHECK-MVE-NEXT: vmov.f32 s14, s2 -; CHECK-MVE-NEXT: vmov.f32 s12, s3 -; CHECK-MVE-NEXT: vmls.f32 s14, s6, s10 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s7, #0 +; CHECK-MVE-NEXT: vmov.f32 s12, s2 +; CHECK-MVE-NEXT: vmov.f32 s14, s3 +; CHECK-MVE-NEXT: vmls.f32 s12, s6, s10 ; CHECK-MVE-NEXT: vmov.f32 s10, s1 -; CHECK-MVE-NEXT: vmls.f32 s12, s7, s11 +; CHECK-MVE-NEXT: vmls.f32 s14, s7, s11 ; CHECK-MVE-NEXT: vmls.f32 s10, s5, s9 ; CHECK-MVE-NEXT: vmov.f32 s9, s0 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s7, #0 +; CHECK-MVE-NEXT: vcmp.f32 s5, #0 ; CHECK-MVE-NEXT: vmls.f32 s9, s4, s8 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -1019,13 +1019,13 @@ define arm_aapcs_vfpcc <4 x float> @vfms32_pred(<4 x float> %src1, <4 x float> % ; CHECK-MVE-NEXT: cset r2, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s12 ; CHECK-MVE-NEXT: cmp r3, #0 -; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s14 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9 ; CHECK-MVE-NEXT: bx lr entry: @@ -1055,33 +1055,33 @@ define arm_aapcs_vfpcc <4 x float> @vfmar32_pred(<4 x float> %src1, <4 x float> ; ; CHECK-MVE-LABEL: vfmar32_pred: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s5, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, #0 -; CHECK-MVE-NEXT: vmov.f32 s10, s3 -; CHECK-MVE-NEXT: vmov.f32 s12, s2 -; CHECK-MVE-NEXT: vmov.f32 s14, s1 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s7, #0 +; CHECK-MVE-NEXT: vmov.f32 s10, s2 +; CHECK-MVE-NEXT: vmov.f32 s12, s1 +; CHECK-MVE-NEXT: vmov.f32 s14, s3 ; CHECK-MVE-NEXT: vmov.f32 s9, s0 -; CHECK-MVE-NEXT: vmla.f32 s10, s7, s8 -; CHECK-MVE-NEXT: vmla.f32 s12, s6, s8 -; CHECK-MVE-NEXT: vmla.f32 s14, s5, s8 +; CHECK-MVE-NEXT: vmla.f32 s10, s6, s8 +; CHECK-MVE-NEXT: vmla.f32 s12, s5, s8 +; CHECK-MVE-NEXT: vmla.f32 s14, s7, s8 ; CHECK-MVE-NEXT: vmla.f32 s9, s4, s8 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s7, #0 +; CHECK-MVE-NEXT: vcmp.f32 s5, #0 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s6, #0 ; CHECK-MVE-NEXT: cset r2, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s10 ; CHECK-MVE-NEXT: cmp r3, #0 -; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12 -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s14 +; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s10 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s9 ; CHECK-MVE-NEXT: bx lr entry: @@ -1112,32 +1112,32 @@ define arm_aapcs_vfpcc <4 x float> @vfmas32_pred(<4 x float> %src1, <4 x float> ; ; CHECK-MVE-LABEL: vfmas32_pred: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s5, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s7, #0 ; CHECK-MVE-NEXT: vmov.f32 s10, s8 ; CHECK-MVE-NEXT: vmov.f32 s12, s8 ; CHECK-MVE-NEXT: vmov.f32 s14, s8 ; CHECK-MVE-NEXT: vmla.f32 s8, s0, s4 -; CHECK-MVE-NEXT: vmla.f32 s10, s3, s7 -; CHECK-MVE-NEXT: vmla.f32 s12, s2, s6 -; CHECK-MVE-NEXT: vmla.f32 s14, s1, s5 +; CHECK-MVE-NEXT: vmla.f32 s10, s2, s6 +; CHECK-MVE-NEXT: vmla.f32 s12, s1, s5 +; CHECK-MVE-NEXT: vmla.f32 s14, s3, s7 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s7, #0 +; CHECK-MVE-NEXT: vcmp.f32 s5, #0 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s6, #0 ; CHECK-MVE-NEXT: cset r2, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s10 ; CHECK-MVE-NEXT: cmp r3, #0 -; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s12 -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s14 +; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s10 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s14 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s8 ; CHECK-MVE-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll index f2ac526892180..742f2a75a1aa8 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -668,62 +668,63 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-LABEL: stest_f64i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: vmov r12, lr, d8 -; CHECK-NEXT: subs.w r4, r0, #-1 -; CHECK-NEXT: mvn r9, #-2147483648 -; CHECK-NEXT: sbcs.w r4, r1, r9 -; CHECK-NEXT: sbcs r4, r2, #0 -; CHECK-NEXT: mov.w r7, #-1 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov.w r10, #-2147483648 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r3, r3, r4, ne -; CHECK-NEXT: csel r2, r2, r4, ne -; CHECK-NEXT: csel r4, r0, r7, ne -; CHECK-NEXT: csel r1, r1, r9, ne -; CHECK-NEXT: rsbs r0, r4, #0 -; CHECK-NEXT: sbcs.w r0, r10, r1 -; CHECK-NEXT: sbcs.w r0, r7, r2 -; CHECK-NEXT: sbcs.w r0, r7, r3 +; CHECK-NEXT: vmov r12, lr, d9 +; CHECK-NEXT: subs.w r5, r0, #-1 +; CHECK-NEXT: mvn r4, #-2147483648 +; CHECK-NEXT: sbcs.w r5, r1, r4 +; CHECK-NEXT: sbcs r5, r2, #0 +; CHECK-NEXT: mov.w r7, #-2147483648 +; CHECK-NEXT: sbcs r5, r3, #0 ; CHECK-NEXT: cset r5, lt ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r8, r1, r10, ne +; CHECK-NEXT: csel r3, r3, r5, ne +; CHECK-NEXT: csel r2, r2, r5, ne +; CHECK-NEXT: mov.w r5, #-1 +; CHECK-NEXT: csel r1, r1, r4, ne +; CHECK-NEXT: csel r0, r0, r5, ne +; CHECK-NEXT: rsbs r6, r0, #0 +; CHECK-NEXT: sbcs.w r6, r7, r1 +; CHECK-NEXT: sbcs.w r2, r5, r2 +; CHECK-NEXT: sbcs.w r2, r5, r3 +; CHECK-NEXT: csel r8, r1, r7, lt +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r9, r0, r1, ne ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: subs.w r6, r0, #-1 -; CHECK-NEXT: sbcs.w r6, r1, r9 +; CHECK-NEXT: sbcs.w r6, r1, r4 ; CHECK-NEXT: sbcs r6, r2, #0 ; CHECK-NEXT: sbcs r6, r3, #0 ; CHECK-NEXT: cset r6, lt ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: csel r1, r1, r9, ne +; CHECK-NEXT: csel r0, r0, r5, ne ; CHECK-NEXT: csel r3, r3, r6, ne ; CHECK-NEXT: csel r2, r2, r6, ne +; CHECK-NEXT: csel r1, r1, r4, ne ; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r10, r1 -; CHECK-NEXT: sbcs.w r2, r7, r2 -; CHECK-NEXT: sbcs.w r2, r7, r3 +; CHECK-NEXT: sbcs.w r6, r7, r1 +; CHECK-NEXT: sbcs.w r2, r5, r2 +; CHECK-NEXT: sbcs.w r2, r5, r3 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r10, ne -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r3, r4, r5, ne +; CHECK-NEXT: csel r1, r1, r7, lt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r3 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r8 +; CHECK-NEXT: vmov q0[2], q0[0], r9, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r8, r1 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -737,38 +738,33 @@ entry: define arm_aapcs_vfpcc <2 x i64> @utest_f64i64(<2 x double> %x) { ; CHECK-LABEL: utest_f64i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vmov r4, r1, d8 +; CHECK-NEXT: vmov r12, lr, d8 ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r6, lo -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r7, r0, r6, ne -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: cset r2, lo +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r4, r1, r2, ne +; CHECK-NEXT: csel r5, r0, r2, ne +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r3, r5, r6, ne -; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptoui <2 x double> %x to <2 x i128> %0 = icmp ult <2 x i128> %conv, @@ -780,10 +776,8 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 @@ -793,47 +787,44 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64(<2 x double> %x) { ; CHECK-NEXT: subs r4, r2, #1 ; CHECK-NEXT: sbcs r4, r3, #0 ; CHECK-NEXT: mov.w r8, #1 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne -; CHECK-NEXT: csel r3, r3, r5, ne +; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r0, r0, r4, ne +; CHECK-NEXT: csel r3, r3, r4, ne +; CHECK-NEXT: csel r1, r1, r4, ne ; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: csel r4, r1, r5, ne -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: sbcs.w r1, r7, r4 -; CHECK-NEXT: sbcs.w r1, r7, r2 -; CHECK-NEXT: sbcs.w r1, r7, r3 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r9, r0, r6, ne +; CHECK-NEXT: rsbs r5, r0, #0 +; CHECK-NEXT: mov.w r4, #0 +; CHECK-NEXT: sbcs.w r5, r4, r1 +; CHECK-NEXT: sbcs.w r2, r4, r2 +; CHECK-NEXT: sbcs.w r2, r4, r3 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r5, r1, r2, ne +; CHECK-NEXT: csel r7, r0, r2, ne ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs r5, r2, #1 -; CHECK-NEXT: sbcs r5, r3, #0 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r0, r0, r5, ne +; CHECK-NEXT: subs r6, r2, #1 +; CHECK-NEXT: sbcs r6, r3, #0 +; CHECK-NEXT: cset r6, lt +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: csel r0, r0, r6, ne +; CHECK-NEXT: csel r3, r3, r6, ne +; CHECK-NEXT: csel r1, r1, r6, ne ; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: csel r3, r3, r5, ne -; CHECK-NEXT: csel r1, r1, r5, ne -; CHECK-NEXT: rsbs r5, r0, #0 -; CHECK-NEXT: sbcs.w r5, r7, r1 -; CHECK-NEXT: sbcs.w r2, r7, r2 -; CHECK-NEXT: sbcs.w r2, r7, r3 +; CHECK-NEXT: rsbs r6, r0, #0 +; CHECK-NEXT: sbcs.w r6, r4, r1 +; CHECK-NEXT: sbcs.w r2, r4, r2 +; CHECK-NEXT: sbcs.w r2, r4, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r3, r4, r6, ne -; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r9 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -847,59 +838,54 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-LABEL: stest_f32i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: vmov r9, r0, d0 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: vmov r0, r9, d0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: subs.w r7, r0, #-1 -; CHECK-NEXT: mvn r10, #-2147483648 -; CHECK-NEXT: sbcs.w r7, r1, r10 -; CHECK-NEXT: mov.w r4, #-1 +; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: sbcs.w r7, r1, r5 +; CHECK-NEXT: mov.w r6, #-1 ; CHECK-NEXT: sbcs r7, r2, #0 -; CHECK-NEXT: mov.w r11, #-2147483648 ; CHECK-NEXT: sbcs r7, r3, #0 ; CHECK-NEXT: cset r7, lt ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r5, r0, r4, ne +; CHECK-NEXT: csel r0, r0, r6, ne ; CHECK-NEXT: csel r3, r3, r7, ne ; CHECK-NEXT: csel r2, r2, r7, ne -; CHECK-NEXT: csel r1, r1, r10, ne -; CHECK-NEXT: rsbs r0, r5, #0 -; CHECK-NEXT: sbcs.w r0, r11, r1 -; CHECK-NEXT: sbcs.w r0, r4, r2 -; CHECK-NEXT: sbcs.w r0, r4, r3 -; CHECK-NEXT: cset r6, lt +; CHECK-NEXT: csel r1, r1, r5, ne +; CHECK-NEXT: rsbs r4, r0, #0 +; CHECK-NEXT: mov.w r7, #-2147483648 +; CHECK-NEXT: sbcs.w r4, r7, r1 +; CHECK-NEXT: sbcs.w r2, r6, r2 +; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: csel r8, r1, r7, lt +; CHECK-NEXT: cset r1, lt +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: csel r10, r0, r1, ne ; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r8, r1, r11, ne ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs.w r7, r0, #-1 -; CHECK-NEXT: sbcs.w r7, r1, r10 -; CHECK-NEXT: sbcs r7, r2, #0 -; CHECK-NEXT: sbcs r7, r3, #0 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: csel r1, r1, r10, ne -; CHECK-NEXT: csel r3, r3, r7, ne -; CHECK-NEXT: csel r2, r2, r7, ne -; CHECK-NEXT: rsbs r7, r0, #0 -; CHECK-NEXT: sbcs.w r7, r11, r1 -; CHECK-NEXT: sbcs.w r2, r4, r2 -; CHECK-NEXT: sbcs.w r2, r4, r3 +; CHECK-NEXT: subs.w r4, r0, #-1 +; CHECK-NEXT: sbcs.w r4, r1, r5 +; CHECK-NEXT: sbcs r4, r2, #0 +; CHECK-NEXT: sbcs r4, r3, #0 +; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r0, r0, r6, ne +; CHECK-NEXT: csel r3, r3, r4, ne +; CHECK-NEXT: csel r2, r2, r4, ne +; CHECK-NEXT: csel r1, r1, r5, ne +; CHECK-NEXT: rsbs r5, r0, #0 +; CHECK-NEXT: sbcs.w r5, r7, r1 +; CHECK-NEXT: sbcs.w r2, r6, r2 +; CHECK-NEXT: sbcs.w r2, r6, r3 ; CHECK-NEXT: cset r2, lt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r11, ne -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r3, r5, r6, ne +; CHECK-NEXT: csel r1, r1, r7, lt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r3 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r8 -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: vmov q0[2], q0[0], r10, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r8, r1 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -913,33 +899,27 @@ entry: define arm_aapcs_vfpcc <2 x i64> @utest_f32i64(<2 x float> %x) { ; CHECK-LABEL: utest_f32i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vmov r4, r0, d0 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: subs r1, r2, #1 -; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: cset r6, lo -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r7, r0, r6, ne +; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: sbcs r2, r3, #0 +; CHECK-NEXT: cset r2, lo +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r6, r0, r2, ne ; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: csel r5, r1, r2, ne ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r3, r5, r6, ne -; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 +; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptoui <2 x float> %x to <2 x i128> %0 = icmp ult <2 x i128> %conv, @@ -951,54 +931,49 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64(<2 x float> %x) { ; CHECK-LABEL: ustest_f32i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: vmov r6, r0, d0 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: vmov r5, r0, d0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r5, r2, #1 +; CHECK-NEXT: subs r4, r2, #1 ; CHECK-NEXT: mov.w r8, #1 -; CHECK-NEXT: sbcs r5, r3, #0 +; CHECK-NEXT: sbcs r4, r3, #0 +; CHECK-NEXT: mov.w r6, #0 ; CHECK-NEXT: cset r4, lt ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: csel r0, r0, r4, ne ; CHECK-NEXT: csel r3, r3, r4, ne -; CHECK-NEXT: csel r5, r1, r4, ne +; CHECK-NEXT: csel r1, r1, r4, ne ; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: rsbs r1, r0, #0 -; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: sbcs.w r1, r4, r5 -; CHECK-NEXT: sbcs.w r1, r4, r2 -; CHECK-NEXT: sbcs.w r1, r4, r3 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r9, r0, r7, ne -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: rsbs r4, r0, #0 +; CHECK-NEXT: sbcs.w r4, r6, r1 +; CHECK-NEXT: sbcs.w r2, r6, r2 +; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r7, r0, r2, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: csel r4, r1, r2, ne ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs r6, r2, #1 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r0, r0, r6, ne +; CHECK-NEXT: subs r5, r2, #1 +; CHECK-NEXT: sbcs r5, r3, #0 +; CHECK-NEXT: cset r5, lt +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: csel r0, r0, r5, ne +; CHECK-NEXT: csel r3, r3, r5, ne +; CHECK-NEXT: csel r1, r1, r5, ne ; CHECK-NEXT: csel r2, r2, r8, ne -; CHECK-NEXT: csel r3, r3, r6, ne -; CHECK-NEXT: csel r1, r1, r6, ne -; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r4, r1 -; CHECK-NEXT: sbcs.w r2, r4, r2 -; CHECK-NEXT: sbcs.w r2, r4, r3 +; CHECK-NEXT: rsbs r5, r0, #0 +; CHECK-NEXT: sbcs.w r5, r6, r1 +; CHECK-NEXT: sbcs.w r2, r6, r2 +; CHECK-NEXT: sbcs.w r2, r6, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r3, r5, r7, ne -; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r9 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -1066,39 +1041,39 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64(<2 x half> %x) { ; CHECK-LABEL: ustest_f16i64: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov.u16 r0, q0[1] ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: rsbs r1, r0, #0 +; CHECK-NEXT: rsbs r4, r0, #0 ; CHECK-NEXT: mov.w r5, #0 -; CHECK-NEXT: sbcs.w r1, r5, r4 -; CHECK-NEXT: sbcs.w r1, r5, r2 -; CHECK-NEXT: sbcs.w r1, r5, r3 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r8, r0, r6, ne +; CHECK-NEXT: sbcs.w r4, r5, r1 +; CHECK-NEXT: sbcs.w r2, r5, r2 +; CHECK-NEXT: sbcs.w r2, r5, r3 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r6, r0, r2, ne ; CHECK-NEXT: vmov.u16 r0, q4[0] +; CHECK-NEXT: csel r7, r1, r2, ne ; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: rsbs r7, r0, #0 -; CHECK-NEXT: sbcs.w r7, r5, r1 +; CHECK-NEXT: rsbs r4, r0, #0 +; CHECK-NEXT: sbcs.w r4, r5, r1 ; CHECK-NEXT: sbcs.w r2, r5, r2 ; CHECK-NEXT: sbcs.w r2, r5, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r3, r4, r6, ne -; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r8 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -1748,62 +1723,63 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: stest_f64i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: vmov r12, lr, d8 -; CHECK-NEXT: subs.w r4, r0, #-1 -; CHECK-NEXT: mvn r9, #-2147483648 -; CHECK-NEXT: sbcs.w r4, r1, r9 -; CHECK-NEXT: sbcs r4, r2, #0 -; CHECK-NEXT: mov.w r7, #-1 -; CHECK-NEXT: sbcs r4, r3, #0 -; CHECK-NEXT: mov.w r10, #-2147483648 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r3, r3, r4, ne -; CHECK-NEXT: csel r2, r2, r4, ne -; CHECK-NEXT: csel r4, r0, r7, ne -; CHECK-NEXT: csel r1, r1, r9, ne -; CHECK-NEXT: rsbs r0, r4, #0 -; CHECK-NEXT: sbcs.w r0, r10, r1 -; CHECK-NEXT: sbcs.w r0, r7, r2 -; CHECK-NEXT: sbcs.w r0, r7, r3 +; CHECK-NEXT: subs.w r5, r0, #-1 +; CHECK-NEXT: mvn r4, #-2147483648 +; CHECK-NEXT: sbcs.w r5, r1, r4 +; CHECK-NEXT: sbcs r5, r2, #0 +; CHECK-NEXT: mov.w r6, #-1 +; CHECK-NEXT: sbcs r5, r3, #0 ; CHECK-NEXT: cset r5, lt ; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r8, r1, r10, ne +; CHECK-NEXT: csel r0, r0, r6, ne +; CHECK-NEXT: csel r3, r3, r5, ne +; CHECK-NEXT: csel r2, r2, r5, ne +; CHECK-NEXT: csel r1, r1, r4, ne +; CHECK-NEXT: rsbs r7, r0, #0 +; CHECK-NEXT: mov.w r5, #-2147483648 +; CHECK-NEXT: sbcs.w r7, r5, r1 +; CHECK-NEXT: sbcs.w r2, r6, r2 +; CHECK-NEXT: sbcs.w r2, r6, r3 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r9, r0, r2, ne +; CHECK-NEXT: csel r8, r1, r5, ne ; CHECK-NEXT: mov r0, r12 ; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: subs.w r6, r0, #-1 -; CHECK-NEXT: sbcs.w r6, r1, r9 -; CHECK-NEXT: sbcs r6, r2, #0 -; CHECK-NEXT: sbcs r6, r3, #0 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: csel r1, r1, r9, ne -; CHECK-NEXT: csel r3, r3, r6, ne -; CHECK-NEXT: csel r2, r2, r6, ne -; CHECK-NEXT: rsbs r6, r0, #0 -; CHECK-NEXT: sbcs.w r6, r10, r1 -; CHECK-NEXT: sbcs.w r2, r7, r2 -; CHECK-NEXT: sbcs.w r2, r7, r3 +; CHECK-NEXT: subs.w r7, r0, #-1 +; CHECK-NEXT: sbcs.w r7, r1, r4 +; CHECK-NEXT: sbcs r7, r2, #0 +; CHECK-NEXT: sbcs r7, r3, #0 +; CHECK-NEXT: cset r7, lt +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: csel r0, r0, r6, ne +; CHECK-NEXT: csel r3, r3, r7, ne +; CHECK-NEXT: csel r2, r2, r7, ne +; CHECK-NEXT: csel r1, r1, r4, ne +; CHECK-NEXT: rsbs r7, r0, #0 +; CHECK-NEXT: sbcs.w r7, r5, r1 +; CHECK-NEXT: sbcs.w r2, r6, r2 +; CHECK-NEXT: sbcs.w r2, r6, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r10, ne -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r3, r4, r5, ne -; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r3 +; CHECK-NEXT: csel r1, r1, r5, ne +; CHECK-NEXT: vmov q0[2], q0[0], r0, r9 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r8 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -1815,38 +1791,33 @@ entry: define arm_aapcs_vfpcc <2 x i64> @utest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: utest_f64i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vmov r4, r1, d8 +; CHECK-NEXT: vmov r12, lr, d8 ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r6, lo -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r7, r0, r6, ne -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: cset r2, lo +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r4, r1, r2, ne +; CHECK-NEXT: csel r5, r0, r2, ne +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r3, r5, r6, ne -; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptoui <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -1857,49 +1828,43 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: ustest_f64i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: vmov r4, r1, d8 +; CHECK-NEXT: vmov r12, lr, d8 ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r6, r0, r7, ne -; CHECK-NEXT: csel r5, r3, r7, ne -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: it mi -; CHECK-NEXT: movmi r6, #0 -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r5, r0, r2, ne +; CHECK-NEXT: csel r0, r3, r2, ne +; CHECK-NEXT: csel r4, r1, r2, ne +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: itt mi +; CHECK-NEXT: movmi r4, #0 +; CHECK-NEXT: movmi r5, #0 +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: mov r1, lr ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r3, r3, r2, ne +; CHECK-NEXT: csel r1, r1, r2, ne ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it mi -; CHECK-NEXT: movmi r0, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 -; CHECK-NEXT: csel r7, r8, r7, ne -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: it mi -; CHECK-NEXT: movmi r7, #0 +; CHECK-NEXT: csel r2, r3, r2, ne ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it mi +; CHECK-NEXT: itt mi +; CHECK-NEXT: movmi r0, #0 ; CHECK-NEXT: movmi r1, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r4 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi <2 x double> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -1911,59 +1876,54 @@ entry: define arm_aapcs_vfpcc <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: stest_f32i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: vmov r9, r0, d0 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} +; CHECK-NEXT: vmov r8, r0, d0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: subs.w r7, r0, #-1 -; CHECK-NEXT: mvn r10, #-2147483648 -; CHECK-NEXT: sbcs.w r7, r1, r10 -; CHECK-NEXT: mov.w r4, #-1 +; CHECK-NEXT: mvn r5, #-2147483648 +; CHECK-NEXT: sbcs.w r7, r1, r5 +; CHECK-NEXT: mov.w r6, #-2147483648 ; CHECK-NEXT: sbcs r7, r2, #0 -; CHECK-NEXT: mov.w r11, #-2147483648 ; CHECK-NEXT: sbcs r7, r3, #0 ; CHECK-NEXT: cset r7, lt ; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r5, r0, r4, ne ; CHECK-NEXT: csel r3, r3, r7, ne ; CHECK-NEXT: csel r2, r2, r7, ne -; CHECK-NEXT: csel r1, r1, r10, ne -; CHECK-NEXT: rsbs r0, r5, #0 -; CHECK-NEXT: sbcs.w r0, r11, r1 -; CHECK-NEXT: sbcs.w r0, r4, r2 -; CHECK-NEXT: sbcs.w r0, r4, r3 -; CHECK-NEXT: cset r6, lt -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r8, r1, r11, ne -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: subs.w r7, r0, #-1 -; CHECK-NEXT: sbcs.w r7, r1, r10 -; CHECK-NEXT: sbcs r7, r2, #0 -; CHECK-NEXT: sbcs r7, r3, #0 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: csel r1, r1, r10, ne -; CHECK-NEXT: csel r3, r3, r7, ne -; CHECK-NEXT: csel r2, r2, r7, ne -; CHECK-NEXT: rsbs r7, r0, #0 -; CHECK-NEXT: sbcs.w r7, r11, r1 -; CHECK-NEXT: sbcs.w r2, r4, r2 -; CHECK-NEXT: sbcs.w r2, r4, r3 +; CHECK-NEXT: mov.w r7, #-1 +; CHECK-NEXT: csel r1, r1, r5, ne +; CHECK-NEXT: csel r0, r0, r7, ne +; CHECK-NEXT: rsbs r4, r0, #0 +; CHECK-NEXT: sbcs.w r4, r6, r1 +; CHECK-NEXT: sbcs.w r2, r7, r2 +; CHECK-NEXT: sbcs.w r2, r7, r3 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r11, ne -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r3, r5, r6, ne +; CHECK-NEXT: csel r10, r0, r2, ne +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: csel r9, r1, r6, ne +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: subs.w r4, r0, #-1 +; CHECK-NEXT: sbcs.w r4, r1, r5 +; CHECK-NEXT: sbcs r4, r2, #0 +; CHECK-NEXT: sbcs r4, r3, #0 +; CHECK-NEXT: cset r4, lt +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: csel r0, r0, r7, ne +; CHECK-NEXT: csel r3, r3, r4, ne +; CHECK-NEXT: csel r2, r2, r4, ne +; CHECK-NEXT: csel r1, r1, r5, ne +; CHECK-NEXT: rsbs r5, r0, #0 +; CHECK-NEXT: sbcs.w r5, r6, r1 +; CHECK-NEXT: sbcs.w r2, r7, r2 +; CHECK-NEXT: sbcs.w r2, r7, r3 +; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r3 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r8 -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: csel r1, r1, r6, ne +; CHECK-NEXT: vmov q0[2], q0[0], r0, r10 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r9 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -1975,33 +1935,27 @@ entry: define arm_aapcs_vfpcc <2 x i64> @utest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: utest_f32i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vmov r4, r0, d0 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: subs r1, r2, #1 -; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: cset r6, lo -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r7, r0, r6, ne +; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: sbcs r2, r3, #0 +; CHECK-NEXT: cset r2, lo +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r6, r0, r2, ne ; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: csel r5, r1, r2, ne ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lo ; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: csel r3, r5, r6, ne -; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: vmov q0[2], q0[0], r0, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r3 -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 +; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptoui <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.umin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -2012,44 +1966,37 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: ustest_f32i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: vmov r5, r0, d0 +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: vmov r4, r0, d0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: subs r1, r2, #1 -; CHECK-NEXT: sbcs r1, r3, #0 -; CHECK-NEXT: cset r7, lt -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r6, r0, r7, ne -; CHECK-NEXT: csel r4, r3, r7, ne -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it mi +; CHECK-NEXT: subs r2, #1 +; CHECK-NEXT: sbcs r2, r3, #0 +; CHECK-NEXT: cset r2, lt +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: csel r6, r0, r2, ne +; CHECK-NEXT: csel r0, r3, r2, ne +; CHECK-NEXT: csel r5, r1, r2, ne +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: itt mi +; CHECK-NEXT: movmi r5, #0 ; CHECK-NEXT: movmi r6, #0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: subs r2, #1 ; CHECK-NEXT: sbcs r2, r3, #0 ; CHECK-NEXT: cset r2, lt ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r3, r3, r2, ne +; CHECK-NEXT: csel r1, r1, r2, ne ; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it mi -; CHECK-NEXT: movmi r0, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 -; CHECK-NEXT: csel r7, r8, r7, ne -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it mi -; CHECK-NEXT: movmi r7, #0 +; CHECK-NEXT: csel r2, r3, r2, ne ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r1, r1, r2, ne -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it mi +; CHECK-NEXT: itt mi +; CHECK-NEXT: movmi r0, #0 ; CHECK-NEXT: movmi r1, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 +; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 +; CHECK-NEXT: pop {r4, r5, r6, pc} entry: %conv = fptosi <2 x float> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) @@ -2112,8 +2059,8 @@ entry: define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-LABEL: ustest_f16i64_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: vmov.u16 r0, q0[1] @@ -2122,24 +2069,19 @@ define arm_aapcs_vfpcc <2 x i64> @ustest_f16i64_mm(<2 x half> %x) { ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov.u16 r0, q4[0] ; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: mov r6, r3 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it mi -; CHECK-NEXT: movmi r4, #0 ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it mi -; CHECK-NEXT: movmi r0, #0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 -; CHECK-NEXT: it mi +; CHECK-NEXT: itt mi ; CHECK-NEXT: movmi r5, #0 +; CHECK-NEXT: movmi r4, #0 +; CHECK-NEXT: bl __fixhfti ; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it mi +; CHECK-NEXT: itt mi +; CHECK-NEXT: movmi r0, #0 ; CHECK-NEXT: movmi r1, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 ; CHECK-NEXT: vmov q0[3], q0[1], r1, r5 ; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll index 75b6cb3e1272b..77548b49d77f2 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll @@ -281,49 +281,51 @@ declare <6 x i32> @llvm.fptosi.sat.v6f64.v6i32 (<6 x double>) define arm_aapcs_vfpcc <1 x i32> @test_signed_v1f64_v1i32(<1 x double> %f) { ; CHECK-LABEL: test_signed_v1f64_v1i32: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: vldr d1, .LCPI8_0 ; CHECK-NEXT: vmov r5, r4, d0 ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI8_1 -; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: bl __aeabi_d2iz +; CHECK-NEXT: vldr d0, .LCPI8_1 ; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_d2iz -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r6, #-2147483648 +; CHECK-NEXT: moveq.w r7, #-2147483648 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: cmp.w r8, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r6, #-2147483648 +; CHECK-NEXT: mvnne r7, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI8_0: -; CHECK-NEXT: .long 4290772992 @ double 2147483647 -; CHECK-NEXT: .long 1105199103 -; CHECK-NEXT: .LCPI8_1: ; CHECK-NEXT: .long 0 @ double -2147483648 ; CHECK-NEXT: .long 3252682752 +; CHECK-NEXT: .LCPI8_1: +; CHECK-NEXT: .long 4290772992 @ double 2147483647 +; CHECK-NEXT: .long 1105199103 %x = call <1 x i32> @llvm.fptosi.sat.v1f64.v1i32(<1 x double> %f) ret <1 x i32> %x } @@ -337,115 +339,82 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f64_v2i32(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI9_0 -; CHECK-NEXT: vmov r8, r7, d9 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: vmov r9, r8, d9 +; CHECK-NEXT: vmov r7, r3, d0 +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vldr d0, .LCPI9_1 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: lsrs r4, r0, #5 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: vmov r6, r5, d0 -; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #-2147483648 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r11, #-2147483648 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: vmov r10, r7, d8 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r8, #1 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: lsr.w r9, r0, #5 -; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: vldr d0, .LCPI9_1 ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r5, #-2147483648 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: vmov r11, r10, d8 ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne +; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq.w r4, #-1 +; CHECK-NEXT: moveq.w r5, #-2147483648 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: itt ne ; CHECK-NEXT: mvnne r5, #-2147483648 +; CHECK-NEXT: movne r4, #0 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r4, #0 ; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r5, r11 -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r2, #-1 -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r2, #0 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r2, #0 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq.w r7, #-2147483648 +; CHECK-NEXT: moveq.w r6, #-1 +; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r4, r2 -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: mvnne r7, #-2147483648 +; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r7, r5 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r4 +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -472,90 +441,53 @@ define arm_aapcs_vfpcc <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) { ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: .pad #24 ; CHECK-NEXT: sub sp, #24 -; CHECK-NEXT: vmov.f32 s16, s0 -; CHECK-NEXT: vmov.f32 s17, s1 +; CHECK-NEXT: vmov.f32 s18, s0 +; CHECK-NEXT: vmov.f32 s19, s1 ; CHECK-NEXT: vldr d0, .LCPI10_0 -; CHECK-NEXT: vmov r4, r6, d1 -; CHECK-NEXT: vmov r2, r11, d0 -; CHECK-NEXT: vmov.f32 s18, s4 -; CHECK-NEXT: vmov.f32 s19, s5 -; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: str.w r11, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI10_1 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: vmov r2, r8, d0 -; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: str.w r8, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r10, #-2147483648 -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: vmov r5, r7, d9 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r10, #-2147483648 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 -; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: vmov r10, r7, d1 +; CHECK-NEXT: vmov r6, r3, d0 +; CHECK-NEXT: vmov.f32 s16, s4 +; CHECK-NEXT: vmov.f32 s17, s5 +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: mov r0, r10 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r0, r10 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: vldr d0, .LCPI10_1 ; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: vmov r1, r0, d9 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: vmov r9, r8, d8 +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: strd r1, r0, [sp, #12] @ 8-byte Folded Spill +; CHECK-NEXT: mov r0, r10 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp.w r11, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r6, #-2147483648 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: moveq.w r11, #-2147483648 +; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r10 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: vmov r9, r8, d8 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r6, #-2147483648 +; CHECK-NEXT: mvnne r11, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #0 -; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: movne.w r11, #0 +; CHECK-NEXT: ldr.w r10, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r9 @@ -565,31 +497,67 @@ define arm_aapcs_vfpcc <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) { ; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: moveq.w r7, #-2147483648 +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: mvnne r7, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: vmov.32 q0[1], r10 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r7, r6 -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq.w r6, #-2147483648 +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: it ne +; CHECK-NEXT: mvnne r6, #-2147483648 +; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: vmov.32 q0[1], r11 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r6, r7 +; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI10_0: -; CHECK-NEXT: .long 4290772992 @ double 2147483647 -; CHECK-NEXT: .long 1105199103 -; CHECK-NEXT: .LCPI10_1: ; CHECK-NEXT: .long 0 @ double -2147483648 ; CHECK-NEXT: .long 3252682752 +; CHECK-NEXT: .LCPI10_1: +; CHECK-NEXT: .long 4290772992 @ double 2147483647 +; CHECK-NEXT: .long 1105199103 %x = call <3 x i32> @llvm.fptosi.sat.v3f64.v3i32(<3 x double> %f) ret <3 x i32> %x } @@ -603,95 +571,86 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f64_v4i32(<4 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI11_0 -; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vmov r5, r6, d10 -; CHECK-NEXT: vmov r9, r3, d0 -; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI11_1 -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r9, r3 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: vmov r11, r1, d11 +; CHECK-NEXT: vldr d0, .LCPI11_1 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r11, r0, d11 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: vmov r7, r10, d8 -; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r0, #-2147483648 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r0, #-2147483648 -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: moveq.w r4, #-2147483648 +; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov r8, r3 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: it ne +; CHECK-NEXT: mvnne r4, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: str.w r9, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r4, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r4, r9 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: str.w r9, [sp] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r6, #-2147483648 -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: moveq.w r9, #-2147483648 +; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r6, #-2147483648 +; CHECK-NEXT: mvnne r9, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #0 -; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr.w r9, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: movne.w r9, #0 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r11 @@ -701,11 +660,17 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f64_v4i32(<4 x double> %f) { ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: moveq.w r8, #-2147483648 +; CHECK-NEXT: ldr.w r10, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r11 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: cmp.w r10, #0 ; CHECK-NEXT: vmov r7, r4, d9 ; CHECK-NEXT: it ne ; CHECK-NEXT: mvnne r8, #-2147483648 @@ -713,51 +678,50 @@ define arm_aapcs_vfpcc <4 x i32> @test_signed_v4f64_v4i32(<4 x double> %f) { ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r8, #0 -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: ldr r3, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: cmp.w r11, #0 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r5, #-2147483648 +; CHECK-NEXT: moveq.w r6, #-2147483648 +; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: cmp.w r10, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r5, #-2147483648 +; CHECK-NEXT: mvnne r6, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r6, r0 -; CHECK-NEXT: vmov q0[3], q0[1], r5, r8 -; CHECK-NEXT: add sp, #32 +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: vmov q0[2], q0[0], r9, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r8 +; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI11_0: -; CHECK-NEXT: .long 4290772992 @ double 2147483647 -; CHECK-NEXT: .long 1105199103 -; CHECK-NEXT: .LCPI11_1: ; CHECK-NEXT: .long 0 @ double -2147483648 ; CHECK-NEXT: .long 3252682752 +; CHECK-NEXT: .LCPI11_1: +; CHECK-NEXT: .long 4290772992 @ double 2147483647 +; CHECK-NEXT: .long 1105199103 %x = call <4 x i32> @llvm.fptosi.sat.v4f64.v4i32(<4 x double> %f) ret <4 x i32> %x } @@ -774,186 +738,186 @@ define arm_aapcs_vfpcc <5 x i32> @test_signed_v5f64_v5i32(<5 x double> %f) { ; CHECK-NEXT: .pad #32 ; CHECK-NEXT: sub sp, #32 ; CHECK-NEXT: vmov.f32 s16, s0 -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: vmov.f32 s17, s1 ; CHECK-NEXT: vldr d0, .LCPI12_0 -; CHECK-NEXT: vmov r5, r4, d4 -; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: vmov r7, r5, d4 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: vmov.f32 s20, s6 -; CHECK-NEXT: vmov.f32 s18, s4 +; CHECK-NEXT: vmov.f32 s18, s6 +; CHECK-NEXT: vmov.f32 s20, s4 ; CHECK-NEXT: vmov.f32 s22, s2 -; CHECK-NEXT: vmov.f32 s21, s7 -; CHECK-NEXT: vmov.f32 s19, s5 +; CHECK-NEXT: vmov.f32 s19, s7 +; CHECK-NEXT: vmov.f32 s21, s5 ; CHECK-NEXT: vmov.f32 s23, s3 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: strd r2, r3, [sp, #20] @ 8-byte Folded Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI12_1 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: vmov r8, r0, d11 -; CHECK-NEXT: cmp.w r10, #0 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: vmov r9, r6, d10 -; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r1, r0, d10 +; CHECK-NEXT: vldr d0, .LCPI12_1 +; CHECK-NEXT: vmov r6, r8, d9 +; CHECK-NEXT: cmp.w r11, #0 +; CHECK-NEXT: vmov r10, r3, d0 +; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: vmov r9, r0, d11 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r11, r3 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r11, #-2147483648 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: moveq.w r4, #-2147483648 +; CHECK-NEXT: str.w r10, [sp] @ 4-byte Spill +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r11, #-2147483648 +; CHECK-NEXT: mvnne r4, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 -; CHECK-NEXT: str.w r11, [r7, #16] -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: ldr.w r10, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: ldr r7, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r4, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: ldr.w r11, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: str r4, [r0, #16] +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: moveq.w r0, #-2147483648 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r10, r11 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r0, #-2147483648 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mvnne r4, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: mov r6, r11 +; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r10, #-2147483648 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: vmov r11, r4, d9 +; CHECK-NEXT: moveq.w r8, #-2147483648 +; CHECK-NEXT: ldr.w r11, [sp] @ 4-byte Reload +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r10, #-2147483648 +; CHECK-NEXT: mvnne r8, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 -; CHECK-NEXT: ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: movne.w r8, #0 +; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr.w r9, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r7, #-2147483648 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq.w r4, #-2147483648 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r7, #-2147483648 +; CHECK-NEXT: mvnne r4, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: vmov r5, r4, d8 +; CHECK-NEXT: vmov r7, r6, d8 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: movne r4, #0 ; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: cmp.w r9, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r6, #-2147483648 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: moveq.w r5, #-2147483648 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r6, #-2147483648 +; CHECK-NEXT: mvnne r5, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #0 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r6, r7 -; CHECK-NEXT: vmov q0[3], q0[1], r10, r0 +; CHECK-NEXT: movne r5, #0 ; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: vmov q0[2], q0[0], r5, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r8, r0 +; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11} @@ -962,11 +926,11 @@ define arm_aapcs_vfpcc <5 x i32> @test_signed_v5f64_v5i32(<5 x double> %f) { ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI12_0: -; CHECK-NEXT: .long 4290772992 @ double 2147483647 -; CHECK-NEXT: .long 1105199103 -; CHECK-NEXT: .LCPI12_1: ; CHECK-NEXT: .long 0 @ double -2147483648 ; CHECK-NEXT: .long 3252682752 +; CHECK-NEXT: .LCPI12_1: +; CHECK-NEXT: .long 4290772992 @ double 2147483647 +; CHECK-NEXT: .long 1105199103 %x = call <5 x i32> @llvm.fptosi.sat.v5f64.v5i32(<5 x double> %f) ret <5 x i32> %x } @@ -983,182 +947,180 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) { ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 ; CHECK-NEXT: vmov.f32 s16, s0 -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill ; CHECK-NEXT: vmov.f32 s17, s1 ; CHECK-NEXT: vldr d0, .LCPI13_0 -; CHECK-NEXT: vmov r9, r4, d5 -; CHECK-NEXT: vmov r2, r6, d0 -; CHECK-NEXT: vmov.f32 s22, s8 -; CHECK-NEXT: vmov.f32 s20, s6 +; CHECK-NEXT: vmov r6, r4, d5 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: vmov.f32 s20, s8 +; CHECK-NEXT: vmov.f32 s22, s6 ; CHECK-NEXT: vmov.f32 s18, s4 ; CHECK-NEXT: vmov.f32 s24, s2 -; CHECK-NEXT: vmov.f32 s23, s9 -; CHECK-NEXT: vmov.f32 s21, s7 +; CHECK-NEXT: vmov.f32 s21, s9 +; CHECK-NEXT: vmov.f32 s23, s7 ; CHECK-NEXT: vmov.f32 s19, s5 ; CHECK-NEXT: vmov.f32 s25, s3 -; CHECK-NEXT: str r2, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI13_1 +; CHECK-NEXT: str r2, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: strd r2, r3, [sp, #32] @ 8-byte Folded Spill +; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vmov r8, r0, d10 -; CHECK-NEXT: cmp.w r11, #0 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: vmov r7, r5, d11 -; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: vldr d0, .LCPI13_1 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vmov r1, r0, d12 -; CHECK-NEXT: strd r1, r0, [sp, #12] @ 8-byte Folded Spill -; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r10, #-2147483648 -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: vmov r7, r8, d10 +; CHECK-NEXT: vmov r11, r10, d11 +; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq.w r5, #-2147483648 +; CHECK-NEXT: mov r9, r2 +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r4 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r10, #-2147483648 +; CHECK-NEXT: mvnne r5, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 -; CHECK-NEXT: ldr.w r11, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: str.w r10, [r11, #20] -; CHECK-NEXT: ldr.w r10, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: movne r5, #0 +; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: str r5, [r6, #20] +; CHECK-NEXT: ldr r2, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: ldr r3, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r6, #-2147483648 +; CHECK-NEXT: moveq.w r5, #-2147483648 +; CHECK-NEXT: ldr r3, [sp, #32] @ 4-byte Reload ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: vmov r2, r1, d9 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: strd r2, r1, [sp] @ 8-byte Folded Spill +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: cmp.w r9, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r6, #-2147483648 +; CHECK-NEXT: mvnne r5, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #0 -; CHECK-NEXT: str.w r6, [r11, #16] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: ldr r4, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: ldr.w r11, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r7, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: ldr r5, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: movne r5, #0 +; CHECK-NEXT: str r5, [r6, #16] +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: ldr.w r8, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: ldr.w r9, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r10, #-2147483648 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r10, #-2147483648 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 -; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: moveq.w r0, #-2147483648 +; CHECK-NEXT: ldr r7, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: it ne +; CHECK-NEXT: mvnne r4, #-2147483648 +; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r5, r6 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: cmp.w r11, #0 +; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r8, #-2147483648 -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: moveq.w r10, #-2147483648 +; CHECK-NEXT: ldr.w r11, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: vmov r7, r6, d9 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r8, #-2147483648 +; CHECK-NEXT: mvnne r10, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r8, #0 -; CHECK-NEXT: ldr.w r11, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: ldr r3, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: movne.w r10, #0 +; CHECK-NEXT: ldr r5, [sp] @ 4-byte Reload +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: moveq.w r4, #-2147483648 -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: ldr.w r8, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: cmp.w r9, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: mvnne r4, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun @@ -1166,38 +1128,39 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) { ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: ldr r3, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: ldr r2, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: cmp.w r11, #0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: cmp.w r9, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: moveq.w r5, #-2147483648 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: cmp.w r9, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: mvnne r5, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r5, #0 +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: vmov q0[2], q0[0], r5, r4 -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov q0[3], q0[1], r8, r10 +; CHECK-NEXT: vmov q0[3], q0[1], r10, r0 +; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12} @@ -1206,11 +1169,11 @@ define arm_aapcs_vfpcc <6 x i32> @test_signed_v6f64_v6i32(<6 x double> %f) { ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI13_0: -; CHECK-NEXT: .long 4290772992 @ double 2147483647 -; CHECK-NEXT: .long 1105199103 -; CHECK-NEXT: .LCPI13_1: ; CHECK-NEXT: .long 0 @ double -2147483648 ; CHECK-NEXT: .long 3252682752 +; CHECK-NEXT: .LCPI13_1: +; CHECK-NEXT: .long 4290772992 @ double 2147483647 +; CHECK-NEXT: .long 1105199103 %x = call <6 x i32> @llvm.fptosi.sat.v6f64.v6i32(<6 x double> %f) ret <6 x i32> %x } @@ -1791,18 +1754,10 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) { ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: vldr s22, .LCPI28_0 ; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vldr s20, .LCPI28_1 @@ -1811,48 +1766,60 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) { ; CHECK-NEXT: itt lt ; CHECK-NEXT: movwlt r11, #0 ; CHECK-NEXT: movtlt r11, #65534 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r9, #-1 ; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: ittt gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r11, #65535 ; CHECK-NEXT: movtgt r11, #1 -; CHECK-NEXT: movgt.w r9, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs.w r9, #0 ; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: ittt lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: movtlt r5, #65534 +; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: ittt gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r5, #65535 ; CHECK-NEXT: movtgt r5, #1 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s22 ; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vcmp.f32 s19, s22 +; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt lt +; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: movtlt r7, #65534 -; CHECK-NEXT: movlt r4, #0 ; CHECK-NEXT: vcmp.f32 s19, s20 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r4, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r7, #65535 ; CHECK-NEXT: movtgt r7, #1 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r4, #-1 ; CHECK-NEXT: vcmp.f32 s19, s19 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -1864,9 +1831,9 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) { ; CHECK-NEXT: bfc r5, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt lt +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -1880,11 +1847,10 @@ define arm_aapcs_vfpcc <4 x i50> @test_signed_v4f32_v4i50(<4 x float> %f) { ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: str.w r0, [r8] -; CHECK-NEXT: lsrs r0, r7, #10 +; CHECK-NEXT: lsr.w r0, r7, #10 ; CHECK-NEXT: bfc r7, #18, #14 ; CHECK-NEXT: bfc r11, #18, #14 ; CHECK-NEXT: lsll r4, r7, #22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: orr.w r3, r5, r7 ; CHECK-NEXT: str.w r3, [r8, #20] ; CHECK-NEXT: orr.w r2, r2, r4 @@ -2029,52 +1995,23 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vmov r6, s17 ; CHECK-NEXT: vldr s22, .LCPI30_0 -; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vldr s20, .LCPI30_1 -; CHECK-NEXT: mov r10, r2 -; CHECK-NEXT: vcmp.f32 s17, s22 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r11, #-1 -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movgt r3, #7 -; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs.w r10, #0 -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: str r7, [sp] @ 4-byte Spill -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r11, #0 -; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vmov r7, s19 ; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r5, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: mvnlt r5, #7 +; CHECK-NEXT: mvnlt r3, #7 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt r5, #7 +; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 @@ -2086,40 +2023,67 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: str.w r1, [r4, #29] -; CHECK-NEXT: vmov r1, s19 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: str.w r0, [r4, #25] ; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: vcmp.f32 s17, s22 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itttt lt +; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: vcmp.f32 s17, s20 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: vcmp.f32 s17, s17 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: movvs.w r10, #0 ; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: mov r0, r1 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vcmp.f32 s19, s22 -; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: mov r8, r3 +; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: mov r8, r2 +; CHECK-NEXT: mov r9, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r8, #7 -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: mvnlt r9, #7 +; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt.w r11, #0 ; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vcmp.f32 s19, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r8, #7 +; CHECK-NEXT: movgt.w r11, #-1 +; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: movgt.w r9, #7 ; CHECK-NEXT: vcmp.f32 s19, s19 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: movvs.w r11, #0 ; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s16, s22 @@ -2148,30 +2112,30 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: str r0, [r4] ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: lsrl r0, r9, #28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: orr.w r1, r9, r6, lsl #4 -; CHECK-NEXT: str.w r1, [r4, #45] -; CHECK-NEXT: and r1, r8, #15 +; CHECK-NEXT: lsrl r0, r11, #28 +; CHECK-NEXT: and r1, r9, #15 ; CHECK-NEXT: str.w r0, [r4, #41] -; CHECK-NEXT: and r0, r5, #15 -; CHECK-NEXT: lsrl r6, r1, #28 -; CHECK-NEXT: strb.w r6, [r4, #49] +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: lsrl r0, r5, #28 +; CHECK-NEXT: str r0, [r4, #16] +; CHECK-NEXT: orr.w r0, r11, r8, lsl #4 +; CHECK-NEXT: lsrl r8, r1, #28 +; CHECK-NEXT: str.w r0, [r4, #45] +; CHECK-NEXT: strb.w r8, [r4, #49] +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: and r0, r0, #15 ; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 ; CHECK-NEXT: str.w r0, [r4, #37] -; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload -; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: lsrl r0, r11, #28 -; CHECK-NEXT: orr.w r1, r11, r10, lsl #4 -; CHECK-NEXT: strd r0, r1, [r4, #16] -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r5, r6, lsl #4 +; CHECK-NEXT: str r0, [r4, #20] +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r10, r1, #28 -; CHECK-NEXT: strb.w r10, [r4, #24] +; CHECK-NEXT: lsrl r6, r1, #28 +; CHECK-NEXT: strb r6, [r4, #24] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: and r0, r3, #15 -; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r10, lsl #4 ; CHECK-NEXT: str r0, [r4, #12] ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9, d10, d11} @@ -2200,61 +2164,58 @@ define arm_aapcs_vfpcc <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vmov r5, s18 ; CHECK-NEXT: vldr s22, .LCPI31_0 -; CHECK-NEXT: vmov r7, s16 ; CHECK-NEXT: vldr s20, .LCPI31_1 -; CHECK-NEXT: vmov r6, s17 +; CHECK-NEXT: add.w r12, r4, #48 ; CHECK-NEXT: vcmp.f32 s19, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s19, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s19, s19 ; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vcmp.f32 s19, s19 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt vs ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: strd r5, r1, [r4, #48] -; CHECK-NEXT: strd r2, r3, [r4, #56] +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: vmov r7, s16 +; CHECK-NEXT: vmov r6, s17 +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: add.w r12, r4, #32 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt vs +; CHECK-NEXT: itttt vs ; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r6 @@ -2262,52 +2223,48 @@ define arm_aapcs_vfpcc <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) { ; CHECK-NEXT: vcmp.f32 s17, s22 ; CHECK-NEXT: add.w r12, r4, #16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s17 ; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vcmp.f32 s17, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt vs +; CHECK-NEXT: itttt vs ; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt vs +; CHECK-NEXT: itttt vs ; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} ; CHECK-NEXT: vpop {d8, d9, d10, d11} @@ -2346,70 +2303,72 @@ define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI32_0 ; CHECK-NEXT: vmov r8, r7, d8 -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r2, r3, [sp, #12] @ 8-byte Folded Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI32_1 -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: vmov r11, r3, d0 +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2iz -; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: vldr d0, .LCPI32_1 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: vmov r6, r5, d9 +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r11, #-1 +; CHECK-NEXT: moveq.w r9, #-1 +; CHECK-NEXT: mov r10, r3 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: mov r2, r8 ; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: vmov r6, r5, d9 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 +; CHECK-NEXT: movne.w r9, #0 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 -; CHECK-NEXT: and r0, r11, #1 -; CHECK-NEXT: ldrd r2, r3, [sp, #12] @ 8-byte Folded Reload +; CHECK-NEXT: movne.w r9, #0 +; CHECK-NEXT: and r0, r9, #1 +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: bfi r4, r0, #0, #1 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2iz ; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: cmp.w r8, #0 ; CHECK-NEXT: it eq ; CHECK-NEXT: moveq.w r7, #-1 +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: cmp.w r8, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r7, #0 ; CHECK-NEXT: bl __aeabi_dcmpun @@ -2419,20 +2378,20 @@ define arm_aapcs_vfpcc <2 x i1> @test_signed_v2f64_v2i1(<2 x double> %f) { ; CHECK-NEXT: and r0, r7, #1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r4, r0, #1, #1 -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: strb r4, [r0] -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI32_0: -; CHECK-NEXT: .long 0 @ double 0 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .LCPI32_1: ; CHECK-NEXT: .long 0 @ double -1 ; CHECK-NEXT: .long 3220176896 +; CHECK-NEXT: .LCPI32_1: +; CHECK-NEXT: .long 0 @ double 0 +; CHECK-NEXT: .long 0 %x = call <2 x i1> @llvm.fptosi.sat.v2f64.v2i1(<2 x double> %f) ret <2 x i1> %x } @@ -2446,115 +2405,82 @@ define arm_aapcs_vfpcc <2 x i8> @test_signed_v2f64_v2i8(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI33_0 -; CHECK-NEXT: vmov r8, r7, d9 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: vmov r9, r8, d9 +; CHECK-NEXT: vmov r7, r3, d0 +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vldr d0, .LCPI33_1 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: lsrs r4, r0, #5 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: vmov r6, r5, d0 -; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r11, #127 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #127 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: vmov r10, r7, d8 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r8, #1 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: lsr.w r9, r0, #5 -; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: vldr d0, .LCPI33_1 ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r5, #127 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: vmov r11, r10, d8 ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne +; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq.w r4, #-1 +; CHECK-NEXT: mvneq r5, #127 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: itt ne ; CHECK-NEXT: movne r5, #127 +; CHECK-NEXT: movne r4, #0 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r4, #0 ; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r5, r11 -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r2, #-1 -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r2, #0 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r2, #0 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: itt eq +; CHECK-NEXT: mvneq r7, #127 +; CHECK-NEXT: moveq.w r6, #-1 +; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r4, r2 -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movne r7, #127 +; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r7, r5 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r4 +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2579,118 +2505,84 @@ define arm_aapcs_vfpcc <2 x i13> @test_signed_v2f64_v2i13(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI34_0 -; CHECK-NEXT: vmov r8, r7, d9 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: vmov r9, r8, d9 +; CHECK-NEXT: vmov r7, r3, d0 +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vldr d0, .LCPI34_1 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: lsrs r4, r0, #5 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: vmov r6, r5, d0 -; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r1, #-1 -; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: vldr d0, .LCPI34_1 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: vmov r11, r10, d8 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: ittt eq +; CHECK-NEXT: movweq r5, #61440 +; CHECK-NEXT: movteq r5, #65535 +; CHECK-NEXT: moveq.w r4, #-1 +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movwne r5, #4095 ; CHECK-NEXT: movne r4, #0 ; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: vmov r10, r7, d8 -; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r8, #1 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it ne +; CHECK-NEXT: itt ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str r4, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: movne r5, #0 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: lsr.w r9, r0, #5 -; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: ittt eq +; CHECK-NEXT: movweq r7, #61440 +; CHECK-NEXT: movteq r7, #65535 +; CHECK-NEXT: moveq.w r6, #-1 +; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r5, #61440 -; CHECK-NEXT: movtne r5, #65535 -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r11, #61440 -; CHECK-NEXT: movtne r11, #65535 -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movwne r11, #4095 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movwne r5, #4095 +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movwne r7, #4095 +; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r5, r11 -; CHECK-NEXT: vmov q0[3], q0[1], r4, r0 -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r7, r5 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r4 +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2715,118 +2607,84 @@ define arm_aapcs_vfpcc <2 x i16> @test_signed_v2f64_v2i16(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI35_0 -; CHECK-NEXT: vmov r8, r7, d9 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: vmov r9, r8, d9 +; CHECK-NEXT: vmov r7, r3, d0 +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vldr d0, .LCPI35_1 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: lsrs r4, r0, #5 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: vmov r6, r5, d0 -; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r1, #-1 -; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: vldr d0, .LCPI35_1 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: vmov r11, r10, d8 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: ittt eq +; CHECK-NEXT: movweq r5, #32768 +; CHECK-NEXT: movteq r5, #65535 +; CHECK-NEXT: moveq.w r4, #-1 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movwne r5, #32767 ; CHECK-NEXT: movne r4, #0 ; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: vmov r10, r7, d8 -; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r8, #1 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it ne +; CHECK-NEXT: itt ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str r4, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: movne r5, #0 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: lsr.w r9, r0, #5 -; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: ittt eq +; CHECK-NEXT: movweq r7, #32768 +; CHECK-NEXT: movteq r7, #65535 +; CHECK-NEXT: moveq.w r6, #-1 +; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r5, #32768 -; CHECK-NEXT: movtne r5, #65535 -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r11, #32768 -; CHECK-NEXT: movtne r11, #65535 -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movwne r11, #32767 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movwne r5, #32767 +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movwne r7, #32767 +; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r5, r11 -; CHECK-NEXT: vmov q0[3], q0[1], r4, r0 -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r7, r5 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r4 +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2851,118 +2709,94 @@ define arm_aapcs_vfpcc <2 x i19> @test_signed_v2f64_v2i19(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI36_0 -; CHECK-NEXT: vmov r7, r6, d9 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: vmov r5, r4, d9 +; CHECK-NEXT: vmov r7, r6, d0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: strd r5, r4, [sp, #12] @ 8-byte Folded Spill ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vldr d0, .LCPI36_1 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r8, r0, d8 -; CHECK-NEXT: vmov r11, r10, d0 -; CHECK-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-NEXT: clz r0, r4 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: ittt ne -; CHECK-NEXT: movwne r9, #0 -; CHECK-NEXT: movtne r9, #65532 -; CHECK-NEXT: movne.w r1, #-1 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: vmov r11, r5, d8 +; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: vmov r10, r0, d0 ; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: ittt eq +; CHECK-NEXT: movweq r8, #0 +; CHECK-NEXT: movteq r8, #65532 +; CHECK-NEXT: moveq.w r9, #-1 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r9, #65535 -; CHECK-NEXT: movtne r9, #3 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: ittt eq +; CHECK-NEXT: moveq r7, #0 +; CHECK-NEXT: movteq r7, #65532 +; CHECK-NEXT: moveq.w r6, #-1 +; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #1 -; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: ittt ne +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movwne r7, #65535 +; CHECK-NEXT: movtne r7, #3 +; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: ldr r4, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r11, r4 -; CHECK-NEXT: lsr.w r10, r0, #5 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: cmp.w r10, #0 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: movtne r4, #65532 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r4, #65535 -; CHECK-NEXT: movtne r4, #3 -; CHECK-NEXT: cmp.w r10, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r7, #-1 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: ittt ne +; CHECK-NEXT: movne.w r9, #0 +; CHECK-NEXT: movwne r8, #65535 +; CHECK-NEXT: movtne r8, #3 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne +; CHECK-NEXT: itt ne ; CHECK-NEXT: movne.w r9, #0 +; CHECK-NEXT: movne.w r8, #0 +; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r1 +; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r4, r9 -; CHECK-NEXT: vmov q0[3], q0[1], r7, r0 -; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r7, r8 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r9 +; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2987,115 +2821,82 @@ define arm_aapcs_vfpcc <2 x i32> @test_signed_v2f64_v2i32_duplicate(<2 x double> ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI37_0 -; CHECK-NEXT: vmov r8, r7, d9 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: vmov r9, r8, d9 +; CHECK-NEXT: vmov r7, r3, d0 +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vldr d0, .LCPI37_1 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: lsrs r4, r0, #5 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: vmov r6, r5, d0 -; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #-2147483648 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r11, #-2147483648 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: vmov r10, r7, d8 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r8, #1 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: lsr.w r9, r0, #5 -; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: vldr d0, .LCPI37_1 ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r5, #-2147483648 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r5, #-2147483648 -; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r5, r11 -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r2, #-1 -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r2, #0 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r2, #0 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: vmov r11, r10, d8 ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne +; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq.w r4, #-1 +; CHECK-NEXT: moveq.w r5, #-2147483648 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: itt ne +; CHECK-NEXT: mvnne r5, #-2147483648 ; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne +; CHECK-NEXT: itt ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r4, r2 -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: movne r5, #0 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq.w r7, #-2147483648 +; CHECK-NEXT: moveq.w r6, #-1 +; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: mvnne r7, #-2147483648 +; CHECK-NEXT: bl __aeabi_dcmpun +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r7, r5 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r4 +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -3124,114 +2925,84 @@ define arm_aapcs_vfpcc <2 x i50> @test_signed_v2f64_v2i50(<2 x double> %f) { ; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI38_0 -; CHECK-NEXT: vmov r7, r6, d9 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: vmov r5, r4, d9 +; CHECK-NEXT: vmov r6, r7, d0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: strd r4, r5, [sp, #8] @ 8-byte Folded Spill ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: vmov r5, r10, d8 ; CHECK-NEXT: vldr d0, .LCPI38_1 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r8, r0, d8 -; CHECK-NEXT: vmov r11, r10, d0 -; CHECK-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-NEXT: clz r0, r4 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: movtne r1, #65534 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r9, #0 +; CHECK-NEXT: cmp.w r11, #0 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: vmov r9, r8, d0 +; CHECK-NEXT: csel r11, r0, r11, ne +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq r4, #0 +; CHECK-NEXT: movteq r4, #65534 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: csel r7, r0, r7, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: itt eq +; CHECK-NEXT: moveq r6, #0 +; CHECK-NEXT: movteq r6, #65534 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r9, #-1 +; CHECK-NEXT: ittt ne +; CHECK-NEXT: movne.w r7, #-1 +; CHECK-NEXT: movwne r6, #65535 +; CHECK-NEXT: movtne r6, #1 +; CHECK-NEXT: ldrd r9, r0, [sp, #8] @ 8-byte Folded Reload +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ittt ne +; CHECK-NEXT: movne.w r11, #-1 +; CHECK-NEXT: movwne r4, #65535 +; CHECK-NEXT: movtne r4, #1 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r2, r0 ; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r5, #65535 -; CHECK-NEXT: movtne r5, #1 -; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r9, #0 -; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #1 -; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r11, r4 -; CHECK-NEXT: lsr.w r10, r0, #5 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: cmp.w r10, #0 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: movtne r7, #65534 -; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r7, #65535 -; CHECK-NEXT: movtne r7, #1 -; CHECK-NEXT: cmp.w r10, #0 -; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r11, #0 ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r4, r9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne ; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r7, r1 +; CHECK-NEXT: vmov q0[2], q0[0], r7, r11 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r6, #0 +; CHECK-NEXT: vmov q0[3], q0[1], r6, r4 ; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 @@ -3257,115 +3028,80 @@ define arm_aapcs_vfpcc <2 x i64> @test_signed_v2f64_v2i64(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI39_0 ; CHECK-NEXT: vmov r8, r7, d9 -; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: vmov r11, r5, d0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vldr d0, .LCPI39_1 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: lsrs r4, r0, #5 +; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: vmov r6, r5, d0 -; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2lz -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vldr d0, .LCPI39_1 +; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: csel r9, r0, r9, ne +; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 +; CHECK-NEXT: vmov r6, r10, d8 +; CHECK-NEXT: strd r2, r3, [sp] @ 8-byte Folded Spill +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq.w r4, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: mov r2, r8 ; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #-1 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r9, #-1 +; CHECK-NEXT: mvnne r4, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun -; CHECK-NEXT: vmov r10, r7, d8 -; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r8, #1 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: movne.w r9, #0 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: lsr.w r9, r0, #5 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq.w r7, #-2147483648 +; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: csel r5, r0, r5, ne +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: itt ne +; CHECK-NEXT: mvnne r7, #-2147483648 ; CHECK-NEXT: movne.w r5, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne +; CHECK-NEXT: itt ne ; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r5, r11 -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r2, #-2147483648 -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r2, #-2147483648 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r2, #0 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-2147483648 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r4, #-2147483648 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r4, r2 -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r5, r9 +; CHECK-NEXT: vmov q0[3], q0[1], r7, r4 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -3394,258 +3130,252 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-NEXT: sub sp, #48 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI40_0 -; CHECK-NEXT: vmov r10, r9, d8 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r7, r3, d0 -; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: str r7, [sp, #44] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI40_1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: vmov r8, r3, d0 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r11, r3 +; CHECK-NEXT: vmov r7, r6, d8 +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: vmov r10, r9, d0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: vldr d0, .LCPI40_1 ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill ; CHECK-NEXT: csel r4, r2, r4, ne -; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: vmov r5, r11, d0 +; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r4, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str r4, [r6, #8] -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: ldr r5, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: str.w r8, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: str.w r8, [sp, #44] @ 4-byte Spill +; CHECK-NEXT: str.w r4, [r8, #8] +; CHECK-NEXT: str.w r9, [sp, #36] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: csel r7, r1, r0, ne -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: csel r4, r1, r0, ne +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: str.w r11, [sp, #40] @ 4-byte Spill +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r7, #-1 +; CHECK-NEXT: movne.w r4, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: str r7, [r6, #4] -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: str r6, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: str.w r11, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: str.w r4, [r8, #4] ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: csel r7, r1, r0, ne -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: csel r4, r1, r0, ne +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r7, #-1 -; CHECK-NEXT: str.w r10, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: str.w r9, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: str r7, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: vmov r9, r8, d9 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: str r7, [r6] -; CHECK-NEXT: ldr r6, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr.w r10, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: str r4, [r0] +; CHECK-NEXT: ldr.w r11, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: cmp.w r11, #0 -; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill -; CHECK-NEXT: csel r11, r1, r11, ne -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: strd r2, r3, [sp, #12] @ 8-byte Folded Spill +; CHECK-NEXT: csel r7, r1, r4, ne +; CHECK-NEXT: mov r4, r5 +; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: ldr r5, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #-1 +; CHECK-NEXT: movne.w r7, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r11, #0 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r5, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r6, r10 -; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r7, #0 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: csel r10, r4, r0, ne +; CHECK-NEXT: csel r6, r6, r0, ne +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #-1 +; CHECK-NEXT: movne.w r6, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 -; CHECK-NEXT: ldr r7, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: ldr r3, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: str.w r10, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: lsrl r10, r11, #28 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-NEXT: movne r0, #0 +; CHECK-NEXT: ldr r1, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: lsrl r0, r7, #28 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: str r0, [r1, #16] ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: str.w r10, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: mov r6, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: ldr.w r11, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r4, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: orr.w r0, r11, r4, lsl #4 +; CHECK-NEXT: orr.w r0, r7, r4, lsl #4 +; CHECK-NEXT: ldr r7, [sp, #44] @ 4-byte Reload ; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: strd r10, r0, [r6, #16] -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: ldr.w r11, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r7, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: str r0, [r7, #20] ; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: it eq -; CHECK-NEXT: mvneq r0, #7 -; CHECK-NEXT: cmp.w r10, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #7 -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mvneq r6, #7 +; CHECK-NEXT: mov r10, r5 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r6, #7 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: and r1, r5, #15 -; CHECK-NEXT: mov r8, r6 +; CHECK-NEXT: movne r0, #0 +; CHECK-NEXT: and r1, r0, #15 ; CHECK-NEXT: lsrl r4, r1, #28 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: strb r4, [r6, #24] -; CHECK-NEXT: ldr r6, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: strb r4, [r7, #24] ; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: ldr r3, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: it eq -; CHECK-NEXT: mvneq r0, #7 -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mvneq r4, #7 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #7 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: movne r4, #7 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: and r0, r4, #15 +; CHECK-NEXT: movne r0, #0 +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: and r0, r0, #15 ; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 -; CHECK-NEXT: str.w r0, [r8, #12] +; CHECK-NEXT: str r0, [r7, #12] ; CHECK-NEXT: add sp, #48 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 @@ -3653,11 +3383,11 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) { ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI40_0: -; CHECK-NEXT: .long 4294967295 @ double 6.3382530011411463E+29 -; CHECK-NEXT: .long 1176502271 -; CHECK-NEXT: .LCPI40_1: ; CHECK-NEXT: .long 0 @ double -6.338253001141147E+29 ; CHECK-NEXT: .long 3323985920 +; CHECK-NEXT: .LCPI40_1: +; CHECK-NEXT: .long 4294967295 @ double 6.3382530011411463E+29 +; CHECK-NEXT: .long 1176502271 %x = call <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double> %f) ret <2 x i100> %x } @@ -3676,247 +3406,237 @@ define arm_aapcs_vfpcc <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) { ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI41_0 ; CHECK-NEXT: vmov r8, r7, d9 -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r11, r3 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI41_1 -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: vmov r4, r3, d0 -; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r4, r2 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r10, r3 -; CHECK-NEXT: strd r2, r1, [sp] @ 8-byte Folded Spill -; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: vldr d0, .LCPI41_1 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: vmov r10, r11, d0 +; CHECK-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: cmp r6, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r10, #-2147483648 +; CHECK-NEXT: moveq.w r5, #-2147483648 +; CHECK-NEXT: str.w r11, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: mov r2, r8 ; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r10, #-2147483648 +; CHECK-NEXT: mvnne r5, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 -; CHECK-NEXT: str.w r10, [r6, #28] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: ldr.w r9, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: mov r5, r11 -; CHECK-NEXT: str.w r11, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr.w r10, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: movne r5, #0 +; CHECK-NEXT: str.w r5, [r9, #28] ; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r11, r4 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str r4, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: csel r4, r1, r0, ne +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: csel r5, r1, r0, ne ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: str.w r10, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: movne.w r5, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: movne r5, #0 +; CHECK-NEXT: str.w r5, [r9, #24] +; CHECK-NEXT: mov r11, r6 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: str r4, [r6, #24] ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r2, r8 ; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r4, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str r4, [r6, #20] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: ldr.w r10, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r11, r6 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldrd r2, r3, [sp, #20] @ 8-byte Folded Reload -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: str.w r4, [r9, #20] ; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: vmov r6, r5, d8 +; CHECK-NEXT: mov r10, r9 +; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r9, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr.w r11, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: vmov r6, r5, d8 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r4, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: str.w r4, [r11, #16] +; CHECK-NEXT: str.w r4, [r10, #16] ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr.w r9, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: ldr.w r8, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r2, r8 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov r4, r3 -; CHECK-NEXT: strd r2, r1, [sp] @ 8-byte Folded Spill -; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: cmp.w r11, #0 +; CHECK-NEXT: mov r9, r3 +; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: moveq.w r4, #-2147483648 +; CHECK-NEXT: moveq.w r9, #-2147483648 +; CHECK-NEXT: ldr.w r10, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mov r4, r2 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: cmp.w r10, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: mvnne r4, #-2147483648 +; CHECK-NEXT: mvnne r9, #-2147483648 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #0 -; CHECK-NEXT: ldr.w r10, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: movne.w r9, #0 +; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: str.w r4, [r10, #12] -; CHECK-NEXT: ldr.w r11, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: str.w r9, [r7, #12] +; CHECK-NEXT: ldr.w r9, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: csel r4, r4, r0, ne +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload -; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: csel r7, r1, r0, ne -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r7, #-1 +; CHECK-NEXT: movne.w r4, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: str.w r7, [r10, #8] ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: str r4, [r7, #8] +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: csel r7, r1, r0, ne ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r7, #-1 +; CHECK-NEXT: movne.w r4, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: str.w r7, [r10, #4] ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: str r4, [r7, #4] +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: csel r7, r1, r0, ne ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r7, #-1 +; CHECK-NEXT: movne.w r4, #-1 ; CHECK-NEXT: bl __aeabi_dcmpun ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: str.w r7, [r10] +; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: str r4, [r7] ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 @@ -3924,11 +3644,11 @@ define arm_aapcs_vfpcc <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) { ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI41_0: -; CHECK-NEXT: .long 4294967295 @ double 1.7014118346046921E+38 -; CHECK-NEXT: .long 1205862399 -; CHECK-NEXT: .LCPI41_1: ; CHECK-NEXT: .long 0 @ double -1.7014118346046923E+38 ; CHECK-NEXT: .long 3353346048 +; CHECK-NEXT: .LCPI41_1: +; CHECK-NEXT: .long 4294967295 @ double 1.7014118346046921E+38 +; CHECK-NEXT: .long 1205862399 %x = call <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double> %f) ret <2 x i128> %x } @@ -4559,101 +4279,103 @@ define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) { ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: .pad #24 ; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vcvtt.f32.f16 s24, s17 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vcvtt.f32.f16 s24, s16 ; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s28, s17 -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r0, s28 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtt.f32.f16 s30, s16 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s30 -; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: vcvtb.f32.f16 s26, s17 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: vmov r0, s26 +; CHECK-NEXT: vldr s22, .LCPI48_0 +; CHECK-NEXT: vldr s20, .LCPI48_1 +; CHECK-NEXT: vcmp.f32 s24, s22 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movtlt r1, #65534 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r1, #65535 +; CHECK-NEXT: movtgt r1, #1 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcvtb.f32.f16 s26, s18 +; CHECK-NEXT: vcvtt.f32.f16 s24, s17 ; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: vmov r0, s26 -; CHECK-NEXT: vldr s20, .LCPI48_0 -; CHECK-NEXT: vldr s22, .LCPI48_1 -; CHECK-NEXT: vcmp.f32 s30, s20 +; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: vcmp.f32 s26, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s22 +; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s30 -; CHECK-NEXT: ittt gt +; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #1 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcvtb.f32.f16 s26, s18 +; CHECK-NEXT: mov r2, r0 +; CHECK-NEXT: vmov r0, s26 +; CHECK-NEXT: vcmp.f32 s24, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: ittt lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movtlt r5, #65534 -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #1 -; CHECK-NEXT: movgt.w r4, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itt lt +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movtlt r1, #65534 ; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: ittt lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movtlt r6, #65534 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: vcmp.f32 s24, s22 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movwgt r6, #65535 -; CHECK-NEXT: movtgt r6, #1 -; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r1, #65535 +; CHECK-NEXT: movtgt r1, #1 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: str.w r8, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: str r6, [sp] @ 4-byte Spill +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r1, [sp] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: vcmp.f32 s26, s22 ; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt lt +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: movtlt r6, #65534 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: vcvtt.f32.f16 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt gt @@ -4664,144 +4386,144 @@ define arm_aapcs_vfpcc <8 x i50> @test_signed_v8f16_v8i50(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r7, #25] +; CHECK-NEXT: str.w r0, [r4, #25] ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: mov r9, r1 -; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: ittt lt -; CHECK-NEXT: movlt.w r8, #0 ; CHECK-NEXT: movwlt r9, #0 ; CHECK-NEXT: movtlt r9, #65534 +; CHECK-NEXT: movlt.w r8, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vcvtb.f32.f16 s18, s19 ; CHECK-NEXT: ittt gt +; CHECK-NEXT: movgt.w r8, #-1 ; CHECK-NEXT: movwgt r9, #65535 ; CHECK-NEXT: movtgt r9, #1 -; CHECK-NEXT: movgt.w r8, #-1 ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs.w r8, #0 ; CHECK-NEXT: movvs.w r9, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: ittt lt -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: movwlt r11, #0 -; CHECK-NEXT: movtlt r11, #65534 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movtlt r5, #65534 +; CHECK-NEXT: movlt.w r11, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vcvtt.f32.f16 s18, s19 ; CHECK-NEXT: ittt gt -; CHECK-NEXT: movwgt r11, #65535 -; CHECK-NEXT: movtgt r11, #1 -; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: movgt.w r11, #-1 +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #1 ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs.w r10, #0 ; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt lt -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movtlt r5, #65534 -; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movtlt r7, #65534 +; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt gt -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #1 -; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: movwgt r7, #65535 +; CHECK-NEXT: movtgt r7, #1 ; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: movvs.w r10, #0 +; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: bfc r11, #18, #14 +; CHECK-NEXT: vcmp.f32 s16, s22 +; CHECK-NEXT: bfc r5, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt lt +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movtlt r1, #65534 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s16, s22 -; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #1 ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vcmp.f32 s16, s16 -; CHECK-NEXT: lsrl r2, r11, #28 +; CHECK-NEXT: lsrl r2, r5, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [r7] -; CHECK-NEXT: lsrs r0, r5, #10 -; CHECK-NEXT: bfc r5, #18, #14 +; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: lsr.w r0, r7, #10 +; CHECK-NEXT: bfc r7, #18, #14 ; CHECK-NEXT: bfc r9, #18, #14 -; CHECK-NEXT: lsll r4, r5, #22 +; CHECK-NEXT: lsll r10, r7, #22 ; CHECK-NEXT: bfc r6, #18, #14 -; CHECK-NEXT: orr.w r3, r11, r5 -; CHECK-NEXT: str.w r3, [r7, #45] -; CHECK-NEXT: orrs r2, r4 -; CHECK-NEXT: str.w r2, [r7, #41] -; CHECK-NEXT: strb.w r0, [r7, #49] +; CHECK-NEXT: orr.w r3, r5, r7 +; CHECK-NEXT: str.w r3, [r4, #45] +; CHECK-NEXT: orr.w r2, r2, r10 +; CHECK-NEXT: str.w r2, [r4, #41] +; CHECK-NEXT: strb.w r0, [r4, #49] ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: lsrl r0, r9, #14 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: orr.w r2, r9, r10, lsl #4 -; CHECK-NEXT: str.w r2, [r7, #37] -; CHECK-NEXT: str.w r0, [r7, #33] +; CHECK-NEXT: orr.w r2, r9, r11, lsl #4 +; CHECK-NEXT: str.w r2, [r4, #37] +; CHECK-NEXT: str.w r0, [r4, #33] ; CHECK-NEXT: orr.w r0, r6, r8, lsl #18 -; CHECK-NEXT: str.w r0, [r7, #29] -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: str.w r0, [r4, #29] ; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload -; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: bfc r5, #18, #14 -; CHECK-NEXT: lsr.w r0, r3, #10 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: lsr.w r5, r3, #10 ; CHECK-NEXT: bfc r3, #18, #14 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: lsll r6, r3, #22 -; CHECK-NEXT: lsrl r2, r5, #28 -; CHECK-NEXT: orr.w r3, r3, r5 -; CHECK-NEXT: str r3, [r7, #20] -; CHECK-NEXT: orr.w r2, r2, r6 -; CHECK-NEXT: str r2, [r7, #16] -; CHECK-NEXT: strb r0, [r7, #24] +; CHECK-NEXT: lsll r0, r3, #22 +; CHECK-NEXT: mov r7, r3 +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: bfc r3, #18, #14 +; CHECK-NEXT: lsrl r2, r3, #28 +; CHECK-NEXT: orr.w r3, r3, r7 +; CHECK-NEXT: str r3, [r4, #20] +; CHECK-NEXT: orr.w r2, r2, r0 +; CHECK-NEXT: str r2, [r4, #16] +; CHECK-NEXT: strb r5, [r4, #24] ; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr r6, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: bfc r3, #18, #14 -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: lsrl r0, r3, #14 -; CHECK-NEXT: orr.w r2, r3, r4, lsl #4 -; CHECK-NEXT: strd r0, r2, [r7, #8] +; CHECK-NEXT: orr.w r2, r3, r6, lsl #4 +; CHECK-NEXT: strd r0, r2, [r4, #8] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: orr.w r0, r1, r6, lsl #18 -; CHECK-NEXT: str r0, [r7, #4] +; CHECK-NEXT: orr.w r0, r1, r7, lsl #18 +; CHECK-NEXT: str r0, [r4, #4] ; CHECK-NEXT: add sp, #24 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 2 @@ -4830,37 +4552,37 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; CHECK-NEXT: vcvtb.f32.f16 s26, s19 ; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: vmov r0, s26 -; CHECK-NEXT: vldr s30, .LCPI49_0 -; CHECK-NEXT: vldr s28, .LCPI49_1 +; CHECK-NEXT: vldr s28, .LCPI49_0 +; CHECK-NEXT: vldr s30, .LCPI49_1 ; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: vcmp.f32 s24, s30 -; CHECK-NEXT: vcvtt.f32.f16 s22, s18 +; CHECK-NEXT: vcmp.f32 s24, s28 +; CHECK-NEXT: vcvtt.f32.f16 s20, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r8, #-2147483648 ; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vcmp.f32 s24, s28 -; CHECK-NEXT: vcvtt.f32.f16 s20, s16 +; CHECK-NEXT: vcmp.f32 s24, s30 +; CHECK-NEXT: vcvtt.f32.f16 s22, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r9, #-1 ; CHECK-NEXT: mvngt r8, #-2147483648 ; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r4, s22 ; CHECK-NEXT: vmov r6, s20 +; CHECK-NEXT: vmov r4, s22 ; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs.w r8, #0 ; CHECK-NEXT: movvs.w r9, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vcmp.f32 s26, s30 +; CHECK-NEXT: vcmp.f32 s26, s28 ; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: movlt.w r11, #-2147483648 -; CHECK-NEXT: vcmp.f32 s26, s28 +; CHECK-NEXT: vcmp.f32 s26, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: itt gt @@ -4873,13 +4595,13 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; CHECK-NEXT: movvs.w r11, #0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vcmp.f32 s22, s30 +; CHECK-NEXT: vcmp.f32 s22, s28 ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r4, #-2147483648 ; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vcmp.f32 s22, s28 +; CHECK-NEXT: vcmp.f32 s22, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: itt gt @@ -4895,12 +4617,12 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vcmp.f32 s20, s30 +; CHECK-NEXT: vcmp.f32 s20, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r6, #-2147483648 ; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s20, s28 +; CHECK-NEXT: vcmp.f32 s20, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r7, #-1 @@ -4911,12 +4633,12 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s30 +; CHECK-NEXT: vcmp.f32 s16, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt.w r1, #-2147483648 -; CHECK-NEXT: vcmp.f32 s16, s28 +; CHECK-NEXT: vcmp.f32 s16, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: itt gt @@ -4932,11 +4654,11 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmov q5[3], q5[1], r1, r6 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s30 +; CHECK-NEXT: vcmp.f32 s16, s28 ; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vcmp.f32 s16, s28 +; CHECK-NEXT: vcmp.f32 s16, s30 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r6, #-2147483648 ; CHECK-NEXT: movlt r7, #0 @@ -4952,12 +4674,12 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s30 +; CHECK-NEXT: vcmp.f32 s16, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt.w r1, #-2147483648 -; CHECK-NEXT: vcmp.f32 s16, s28 +; CHECK-NEXT: vcmp.f32 s16, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: itt gt @@ -4973,13 +4695,13 @@ define arm_aapcs_vfpcc <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: vmov q6[3], q6[1], r1, r6 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s16, s30 +; CHECK-NEXT: vcmp.f32 s16, s28 ; CHECK-NEXT: vmov q3[2], q3[0], r10, r9 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt.w r1, #-2147483648 -; CHECK-NEXT: vcmp.f32 s16, s28 +; CHECK-NEXT: vcmp.f32 s16, s30 ; CHECK-NEXT: vmov q3[3], q3[1], r11, r8 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt @@ -5016,77 +4738,109 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: .pad #56 -; CHECK-NEXT: sub sp, #56 +; CHECK-NEXT: .pad #48 +; CHECK-NEXT: sub sp, #48 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vcvtt.f32.f16 s24, s16 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vcvtb.f32.f16 s24, s17 ; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcvtt.f32.f16 s26, s17 -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vcvtb.f32.f16 s26, s18 +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: vmov r0, s26 ; CHECK-NEXT: vldr s22, .LCPI50_0 ; CHECK-NEXT: vldr s20, .LCPI50_1 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: vcmp.f32 s24, s22 +; CHECK-NEXT: mov r10, r2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt ; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: movgt.w r8, #-1 ; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #52] @ 4-byte Spill -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #48] @ 4-byte Spill -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: str r7, [sp, #40] @ 4-byte Spill -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: movvs.w r10, #0 ; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vcvtb.f32.f16 s24, s19 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vcmp.f32 s26, s22 -; CHECK-NEXT: vcvtt.f32.f16 s24, s18 +; CHECK-NEXT: mov r7, r2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt ; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: vcmp.f32 s26, s26 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: vcmp.f32 s24, s22 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: itttt gt +; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: movgt r3, #7 -; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: str.w r2, [r4, #83] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: str.w r1, [r4, #79] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: vcvtt.f32.f16 s24, s16 +; CHECK-NEXT: str.w r0, [r4, #75] ; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: str.w r7, [r4, #58] +; CHECK-NEXT: str.w r6, [r4, #54] +; CHECK-NEXT: str.w r5, [r4, #50] +; CHECK-NEXT: str.w r10, [r4, #33] +; CHECK-NEXT: str.w r9, [r4, #29] +; CHECK-NEXT: str.w r8, [r4, #25] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s24, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -5106,155 +4860,119 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vcvtb.f32.f16 s24, s17 -; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: vcvtt.f32.f16 s24, s17 +; CHECK-NEXT: str r0, [sp, #40] @ 4-byte Spill ; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcvtb.f32.f16 s18, s18 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: mov r8, r1 ; CHECK-NEXT: vcmp.f32 s24, s22 -; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: vcvtt.f32.f16 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt ; CHECK-NEXT: mvnlt r3, #7 -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: ittt vs -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: movvs.w r8, #0 -; CHECK-NEXT: movvs r6, #0 +; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: str r2, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r11, r1 -; CHECK-NEXT: mov r4, r2 +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: mov r8, r2 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: movlt r4, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: vcvtb.f32.f16 s18, s19 +; CHECK-NEXT: vcvtt.f32.f16 s18, s19 ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r4, #-1 -; CHECK-NEXT: movgt.w r11, #-1 ; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r8, #-1 ; CHECK-NEXT: movgt r3, #7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: str r3, [sp] @ 4-byte Spill ; CHECK-NEXT: ittt vs +; CHECK-NEXT: movvs.w r8, #0 +; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: movvs.w r11, #0 -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r9, r3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: mvnlt r9, #7 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r9, #7 -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str.w r2, [r10, #83] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str.w r1, [r10, #79] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: vcvtt.f32.f16 s18, s19 -; CHECK-NEXT: str.w r0, [r10, #75] -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: str.w r4, [r10, #58] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r11, [r10, #54] -; CHECK-NEXT: str.w r7, [r10, #50] -; CHECK-NEXT: str.w r6, [r10, #33] -; CHECK-NEXT: str.w r8, [r10, #29] -; CHECK-NEXT: str.w r5, [r10, #25] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r9, #0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: mov r4, r3 +; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: mov r11, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt -; CHECK-NEXT: mvnlt r4, #7 +; CHECK-NEXT: mvnlt r11, #7 +; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movlt.w r9, #0 ; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: movgt.w r7, #-1 ; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt r4, #7 +; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: movgt.w r10, #-1 +; CHECK-NEXT: movgt.w r11, #7 ; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt vs -; CHECK-NEXT: movvs r4, #0 +; CHECK-NEXT: movvs.w r11, #0 +; CHECK-NEXT: movvs.w r10, #0 +; CHECK-NEXT: movvs.w r9, #0 ; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: movvs r5, #0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcmp.f32 s16, s22 +; CHECK-NEXT: mov r12, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: mvnlt r3, #7 +; CHECK-NEXT: mvnlt r12, #7 ; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt r3, #7 +; CHECK-NEXT: movgt.w r12, #7 ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 @@ -5262,74 +4980,73 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: str.w r2, [r10, #8] +; CHECK-NEXT: str r2, [r4, #8] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str.w r1, [r10, #4] +; CHECK-NEXT: str r1, [r4, #4] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r10] +; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: lsrl r0, r9, #28 +; CHECK-NEXT: str.w r0, [r4, #91] ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: lsrl r0, r7, #28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: orr.w r1, r7, r6, lsl #4 -; CHECK-NEXT: str.w r1, [r10, #95] -; CHECK-NEXT: and r1, r4, #15 -; CHECK-NEXT: str.w r0, [r10, #91] -; CHECK-NEXT: and r0, r9, #15 -; CHECK-NEXT: lsrl r6, r1, #28 -; CHECK-NEXT: strb.w r6, [r10, #99] -; CHECK-NEXT: orr.w r0, r0, r5, lsl #4 -; CHECK-NEXT: str.w r0, [r10, #87] -; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: str.w r0, [r4, #66] +; CHECK-NEXT: ldr.w lr, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r0, lr +; CHECK-NEXT: lsrl r0, r3, #28 +; CHECK-NEXT: str.w r0, [r4, #41] +; CHECK-NEXT: ldrd r0, r1, [sp, #40] @ 8-byte Folded Reload ; CHECK-NEXT: lsrl r0, r1, #28 -; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 -; CHECK-NEXT: str.w r1, [r10, #70] -; CHECK-NEXT: str.w r0, [r10, #66] -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r10, #74] +; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill +; CHECK-NEXT: and r1, r11, #15 +; CHECK-NEXT: str r0, [r4, #16] +; CHECK-NEXT: orr.w r0, r9, r10, lsl #4 +; CHECK-NEXT: lsrl r10, r1, #28 +; CHECK-NEXT: str.w r0, [r4, #95] +; CHECK-NEXT: strb.w r10, [r4, #99] +; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: and r0, r0, #15 +; CHECK-NEXT: orr.w r0, r0, r6, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #87] +; CHECK-NEXT: orr.w r0, r7, r8, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #70] ; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-NEXT: and r1, r0, #15 +; CHECK-NEXT: lsrl r8, r1, #28 +; CHECK-NEXT: strb.w r8, [r4, #74] +; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload ; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 -; CHECK-NEXT: str.w r0, [r10, #62] -; CHECK-NEXT: ldr r7, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: lsrl r0, r1, #28 -; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 -; CHECK-NEXT: str.w r1, [r10, #45] -; CHECK-NEXT: str.w r0, [r10, #41] -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r0, r5, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #62] +; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r3, r2, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #45] +; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: and r1, r0, #15 ; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r10, #49] -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: strb.w r2, [r4, #49] +; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 -; CHECK-NEXT: str.w r0, [r10, #37] -; CHECK-NEXT: ldr r7, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: ldr r1, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #48] @ 4-byte Reload -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: lsrl r0, r1, #28 -; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 -; CHECK-NEXT: strd r0, r1, [r10, #16] -; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r0, lr, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #37] +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 +; CHECK-NEXT: str r0, [r4, #20] +; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: and r1, r0, #15 ; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r10, #24] +; CHECK-NEXT: strb r2, [r4, #24] ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: and r0, r3, #15 -; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 -; CHECK-NEXT: str.w r0, [r10, #12] -; CHECK-NEXT: add sp, #56 +; CHECK-NEXT: movvs.w r12, #0 +; CHECK-NEXT: ldr r1, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: and r0, r12, #15 +; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 +; CHECK-NEXT: str r0, [r4, #12] +; CHECK-NEXT: add sp, #48 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -5346,62 +5063,63 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) { define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { ; CHECK-LABEL: test_signed_v8f16_v8i128: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtt.f32.f16 s26, s19 -; CHECK-NEXT: vcvtb.f32.f16 s28, s19 -; CHECK-NEXT: vmov r0, s26 -; CHECK-NEXT: vcvtb.f32.f16 s24, s17 -; CHECK-NEXT: vldr s20, .LCPI51_0 -; CHECK-NEXT: vmov r5, s28 -; CHECK-NEXT: vmov r8, s24 -; CHECK-NEXT: vcvtt.f32.f16 s30, s18 +; CHECK-NEXT: vcvtt.f32.f16 s30, s19 +; CHECK-NEXT: vcvtb.f32.f16 s20, s16 +; CHECK-NEXT: vmov r0, s30 +; CHECK-NEXT: vcvtb.f32.f16 s26, s19 +; CHECK-NEXT: vldr s22, .LCPI51_0 +; CHECK-NEXT: vmov r5, s20 +; CHECK-NEXT: vmov r7, s26 +; CHECK-NEXT: vcvtt.f32.f16 s28, s18 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vldr s22, .LCPI51_1 +; CHECK-NEXT: vldr s24, .LCPI51_1 ; CHECK-NEXT: add.w r12, r4, #112 -; CHECK-NEXT: vmov r6, s30 -; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: vmov r6, s28 +; CHECK-NEXT: vcvtb.f32.f16 s18, s18 +; CHECK-NEXT: vcmp.f32 s30, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s30, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vcmp.f32 s30, s30 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt vs ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: vcvtb.f32.f16 s26, s18 +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s28, s22 +; CHECK-NEXT: vcmp.f32 s26, s24 ; CHECK-NEXT: add.w r12, r4, #96 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s26, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 +; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 @@ -5410,27 +5128,26 @@ define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt vs ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmov r7, s26 -; CHECK-NEXT: vcvtt.f32.f16 s28, s17 +; CHECK-NEXT: vmov r7, s18 +; CHECK-NEXT: vcvtt.f32.f16 s26, s17 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s30, s22 +; CHECK-NEXT: vcmp.f32 s28, s24 ; CHECK-NEXT: add.w r12, r4, #80 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s28, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s30 +; CHECK-NEXT: vcmp.f32 s28, s28 ; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 @@ -5439,155 +5156,145 @@ define arm_aapcs_vfpcc <8 x i128> @test_signed_v8f16_v8i128(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt vs ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: vmov r5, s28 -; CHECK-NEXT: vcvtt.f32.f16 s18, s16 +; CHECK-NEXT: vmov r6, s26 +; CHECK-NEXT: vcvtb.f32.f16 s28, s17 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: vcmp.f32 s18, s24 ; CHECK-NEXT: add.w r12, r4, #64 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: vcvtt.f32.f16 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: it vs +; CHECK-NEXT: ittt vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: vmov r6, s18 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: vmov r7, s28 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s28, s22 +; CHECK-NEXT: vcmp.f32 s26, s24 ; CHECK-NEXT: add.w r12, r4, #48 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s26, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s28 ; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: it vs +; CHECK-NEXT: ittt vs ; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt vs ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: vmov r7, s16 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: vmov r6, s16 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s24, s22 +; CHECK-NEXT: vcmp.f32 s28, s24 ; CHECK-NEXT: add.w r12, r4, #32 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s28, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vcmp.f32 s28, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt vs +; CHECK-NEXT: itttt vs ; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: vcmp.f32 s16, s24 ; CHECK-NEXT: add.w r12, r4, #16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt vs +; CHECK-NEXT: itttt vs ; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: vcmp.f32 s16, s22 +; CHECK-NEXT: vcmp.f32 s20, s24 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r3, #-2147483648 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s20, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: mvngt r3, #-2147483648 +; CHECK-NEXT: vcmp.f32 s20, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt vs +; CHECK-NEXT: itttt vs ; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI51_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll index 13609bd1903f2..ee040feca4240 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll @@ -268,41 +268,38 @@ declare <6 x i32> @llvm.fptoui.sat.v6f64.v6i32 (<6 x double>) define arm_aapcs_vfpcc <1 x i32> @test_unsigned_v1f64_v1i32(<1 x double> %f) { ; CHECK-LABEL: test_unsigned_v1f64_v1i32: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: vldr d1, .LCPI8_0 ; CHECK-NEXT: vmov r4, r5, d0 ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI8_1 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2uiz -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne +; CHECK-NEXT: vldr d0, .LCPI8_1 ; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: csel r6, r0, r6, ne +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: movne.w r6, #-1 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: pop {r4, r5, r6, pc} ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI8_0: -; CHECK-NEXT: .long 4292870144 @ double 4294967295 -; CHECK-NEXT: .long 1106247679 -; CHECK-NEXT: .LCPI8_1: ; CHECK-NEXT: .long 0 @ double 0 ; CHECK-NEXT: .long 0 +; CHECK-NEXT: .LCPI8_1: +; CHECK-NEXT: .long 4292870144 @ double 4294967295 +; CHECK-NEXT: .long 1106247679 %x = call <1 x i32> @llvm.fptoui.sat.v1f64.v1i32(<1 x double> %f) ret <1 x i32> %x } @@ -316,84 +313,60 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI9_0 ; CHECK-NEXT: vmov r6, r7, d9 -; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: vmov r11, r3, d0 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vldr d0, .LCPI9_1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: vmov r9, r8, d0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vmov r11, r4, d8 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: csel r9, r0, r8, ne +; CHECK-NEXT: csel r8, r1, r8, ne +; CHECK-NEXT: vmov r10, r3, d0 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r5, #-1 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: vmov r5, r4, d8 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #1 -; CHECK-NEXT: ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r8, #0 +; CHECK-NEXT: movne.w r9, #-1 +; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r8, r4 +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: lsrs r4, r0, #5 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r10 -; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: csel r7, r1, r6, ne +; CHECK-NEXT: csel r6, r0, r6, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r6, #-1 +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r6, r9 +; CHECK-NEXT: vmov q0[3], q0[1], r7, r8 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -418,99 +391,93 @@ define arm_aapcs_vfpcc <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov.f32 s18, s0 ; CHECK-NEXT: vmov.f32 s19, s1 ; CHECK-NEXT: vldr d0, .LCPI10_0 -; CHECK-NEXT: vmov r4, r5, d1 -; CHECK-NEXT: vmov r9, r7, d0 +; CHECK-NEXT: vmov r8, r9, d1 +; CHECK-NEXT: vmov r5, r4, d0 ; CHECK-NEXT: vmov.f32 s16, s4 ; CHECK-NEXT: vmov.f32 s17, s5 -; CHECK-NEXT: str.w r9, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI10_1 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: vmov r11, r3, d0 -; CHECK-NEXT: str r3, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: str r4, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r9 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: vmov r10, r8, d8 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: vmov r5, r4, d9 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: vldr d0, .LCPI10_1 +; CHECK-NEXT: vmov r11, r1, d9 +; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: vmov r7, r6, d8 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: csel r10, r0, r10, ne +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: mov r9, r2 +; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r10, #-1 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: csel r6, r0, r9, ne -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: csel r5, r0, r4, ne +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r6, #-1 -; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: movne.w r5, #-1 +; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: ldrd r2, r3, [sp, #4] @ 8-byte Folded Reload +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: csel r4, r0, r4, ne +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: vmov.32 q0[1], r10 +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov.32 q0[1], r1 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r6 -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: vmov q0[2], q0[0], r4, r5 +; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI10_0: -; CHECK-NEXT: .long 4292870144 @ double 4294967295 -; CHECK-NEXT: .long 1106247679 -; CHECK-NEXT: .LCPI10_1: ; CHECK-NEXT: .long 0 @ double 0 ; CHECK-NEXT: .long 0 +; CHECK-NEXT: .LCPI10_1: +; CHECK-NEXT: .long 4292870144 @ double 4294967295 +; CHECK-NEXT: .long 1106247679 %x = call <3 x i32> @llvm.fptoui.sat.v3f64.v3i32(<3 x double> %f) ret <3 x i32> %x } @@ -529,103 +496,103 @@ define arm_aapcs_vfpcc <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) { ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI11_0 ; CHECK-NEXT: vmov q5, q1 -; CHECK-NEXT: vmov r7, r9, d0 -; CHECK-NEXT: vmov r4, r5, d10 -; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: vmov r8, r9, d10 +; CHECK-NEXT: vmov r2, r11, d0 +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vldr d0, .LCPI11_1 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: vmov r5, r1, d11 +; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: vmov r6, r7, d8 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: strd r2, r3, [sp, #16] @ 8-byte Folded Spill +; CHECK-NEXT: csel r4, r0, r10, ne +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: strd r5, r1, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r8, r2 +; CHECK-NEXT: mov r9, r3 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: mov r5, r7 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: mov r10, r11 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: vmov r10, r8, d8 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: csel r0, r0, r6, ne -; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: csel r4, r0, r4, ne +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: vmov r11, r5, d11 -; CHECK-NEXT: mov r4, r7 -; CHECK-NEXT: str r7, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r8, r9 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: csel r8, r0, r9, ne -; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-NEXT: mov r2, r4 ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r8, #-1 -; CHECK-NEXT: ldr.w r10, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: ldr.w r11, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: ldr.w r9, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: str r4, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r1, r9 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r1, r9 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: vmov r4, r5, d9 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r6, r0, r7, ne -; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: ldr.w r9, [sp] @ 4-byte Reload +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: csel r5, r0, r4, ne +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: vmov r7, r6, d9 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r6, #-1 -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: movne.w r5, #-1 +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldrd r2, r3, [sp, #16] @ 8-byte Folded Reload -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: csel r4, r0, r4, ne +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r8, r1 -; CHECK-NEXT: vmov q0[3], q0[1], r0, r6 +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: ldrd r1, r0, [sp, #12] @ 8-byte Folded Reload +; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-NEXT: vmov q0[3], q0[1], r4, r5 ; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: add sp, #4 @@ -633,11 +600,11 @@ define arm_aapcs_vfpcc <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) { ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI11_0: -; CHECK-NEXT: .long 4292870144 @ double 4294967295 -; CHECK-NEXT: .long 1106247679 -; CHECK-NEXT: .LCPI11_1: ; CHECK-NEXT: .long 0 @ double 0 ; CHECK-NEXT: .long 0 +; CHECK-NEXT: .LCPI11_1: +; CHECK-NEXT: .long 4292870144 @ double 4294967295 +; CHECK-NEXT: .long 1106247679 %x = call <4 x i32> @llvm.fptoui.sat.v4f64.v4i32(<4 x double> %f) ret <4 x i32> %x } @@ -651,162 +618,151 @@ define arm_aapcs_vfpcc <5 x i32> @test_unsigned_v5f64_v5i32(<5 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: .pad #40 -; CHECK-NEXT: sub sp, #40 +; CHECK-NEXT: .pad #32 +; CHECK-NEXT: sub sp, #32 ; CHECK-NEXT: vmov.f32 s16, s0 -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vmov.f32 s17, s1 ; CHECK-NEXT: vldr d0, .LCPI12_0 -; CHECK-NEXT: vmov r5, r6, d4 -; CHECK-NEXT: str r0, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: vmov r6, r11, d4 +; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: vmov.f32 s20, s6 -; CHECK-NEXT: vmov.f32 s18, s4 +; CHECK-NEXT: vmov.f32 s18, s6 +; CHECK-NEXT: vmov.f32 s20, s4 ; CHECK-NEXT: vmov.f32 s22, s2 -; CHECK-NEXT: vmov.f32 s21, s7 -; CHECK-NEXT: vmov.f32 s19, s5 +; CHECK-NEXT: vmov.f32 s19, s7 +; CHECK-NEXT: vmov.f32 s21, s5 ; CHECK-NEXT: vmov.f32 s23, s3 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: strd r2, r3, [sp, #32] @ 8-byte Folded Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI12_1 -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: vmov r7, r3, d0 -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vmov r8, r1, d11 -; CHECK-NEXT: cmp.w r11, #0 -; CHECK-NEXT: vmov r6, r9, d10 -; CHECK-NEXT: csel r0, r0, r11, ne -; CHECK-NEXT: cmp.w r10, #0 -; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: vmov r2, r1, d9 -; CHECK-NEXT: strd r2, r1, [sp, #16] @ 8-byte Folded Spill -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str r0, [r4, #16] +; CHECK-NEXT: vldr d0, .LCPI12_1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vmov r10, r9, d9 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: csel r4, r0, r4, ne ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: ldr.w r10, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: vmov r5, r1, d10 +; CHECK-NEXT: strd r5, r1, [sp, #12] @ 8-byte Folded Spill +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r11, r2 +; CHECK-NEXT: mov r5, r3 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: str r4, [r7, #16] +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: mov r1, r9 -; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r0, r10 ; CHECK-NEXT: mov r1, r9 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: cmp.w r11, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: mov r11, r10 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: csel r4, r0, r4, ne +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r10, r11 +; CHECK-NEXT: mov r11, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr.w r10, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: mov r5, r6 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r9, r7 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: str r4, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: csel r0, r0, r6, ne ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr r4, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: ldr.w r8, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: csel r9, r0, r4, ne +; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: mov r11, r10 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r8, r11 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r9, #-1 +; CHECK-NEXT: ldr.w r11, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: ldr r5, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: csel r4, r0, r5, ne -; CHECK-NEXT: vmov r5, r6, d8 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: ldr r3, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: csel r7, r0, r4, ne +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r8 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: vmov r4, r5, d8 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r7, #-1 +; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: csel r6, r0, r6, ne +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 +; CHECK-NEXT: movne.w r6, #-1 +; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: vmov q0[2], q0[0], r6, r7 +; CHECK-NEXT: vmov q0[3], q0[1], r9, r0 ; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 -; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: add sp, #40 +; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI12_0: -; CHECK-NEXT: .long 4292870144 @ double 4294967295 -; CHECK-NEXT: .long 1106247679 -; CHECK-NEXT: .LCPI12_1: ; CHECK-NEXT: .long 0 @ double 0 ; CHECK-NEXT: .long 0 +; CHECK-NEXT: .LCPI12_1: +; CHECK-NEXT: .long 4292870144 @ double 4294967295 +; CHECK-NEXT: .long 1106247679 %x = call <5 x i32> @llvm.fptoui.sat.v5f64.v5i32(<5 x double> %f) ret <5 x i32> %x } @@ -823,172 +779,161 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) { ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 ; CHECK-NEXT: vmov.f32 s16, s0 -; CHECK-NEXT: str r0, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill ; CHECK-NEXT: vmov.f32 s17, s1 ; CHECK-NEXT: vldr d0, .LCPI13_0 ; CHECK-NEXT: vmov r5, r6, d5 -; CHECK-NEXT: vmov r11, r3, d0 -; CHECK-NEXT: vmov.f32 s22, s8 -; CHECK-NEXT: vmov.f32 s20, s6 +; CHECK-NEXT: vmov r10, r3, d0 +; CHECK-NEXT: vmov.f32 s20, s8 +; CHECK-NEXT: vmov.f32 s22, s6 ; CHECK-NEXT: vmov.f32 s18, s4 ; CHECK-NEXT: vmov.f32 s24, s2 -; CHECK-NEXT: vmov.f32 s23, s9 -; CHECK-NEXT: vmov.f32 s21, s7 +; CHECK-NEXT: vmov.f32 s21, s9 +; CHECK-NEXT: vmov.f32 s23, s7 ; CHECK-NEXT: vmov.f32 s19, s5 ; CHECK-NEXT: vmov.f32 s25, s3 -; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: str.w r11, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI13_1 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: vmov r4, r9, d0 -; CHECK-NEXT: str r4, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: vmov r10, r1, d10 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: vmov r5, r6, d11 -; CHECK-NEXT: csel r0, r0, r8, ne -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: vmov r2, r1, d12 -; CHECK-NEXT: strd r2, r1, [sp, #12] @ 8-byte Folded Spill -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr r7, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: str r0, [r7, #20] -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: ldr.w r8, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: vmov r9, r1, d11 +; CHECK-NEXT: vldr d0, .LCPI13_1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vmov r8, r11, d10 +; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: csel r4, r0, r4, ne ; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: str r1, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: vmov r7, r1, d12 +; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: mov r5, r2 +; CHECK-NEXT: strd r7, r1, [sp, #24] @ 8-byte Folded Spill ; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: ldr r7, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: str r4, [r7, #20] +; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vmov r2, r1, d9 ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: cmp.w r11, #0 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: strd r2, r1, [sp, #4] @ 8-byte Folded Spill -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str r0, [r7, #16] -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: ldr r6, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: ldr.w r11, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: mov r8, r9 +; CHECK-NEXT: csel r4, r0, r4, ne +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r8, r5 +; CHECK-NEXT: strd r2, r1, [sp, #16] @ 8-byte Folded Spill +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: ldr.w r11, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: str r4, [r7, #16] +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: csel r0, r0, r7, ne ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: ldr r4, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: ldr.w r9, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: csel r4, r0, r4, ne ; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: str.w r8, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: str r0, [sp] @ 4-byte Spill -; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: str r4, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: mov r6, r7 -; CHECK-NEXT: mov r10, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r9 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: csel r9, r0, r7, ne -; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload -; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: csel r9, r0, r4, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne ; CHECK-NEXT: movne.w r9, #-1 -; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: vmov r5, r6, d8 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r4, r0, r7, ne -; CHECK-NEXT: cmp.w r11, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 -; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: ldr r3, [sp, #36] @ 4-byte Reload -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: csel r8, r0, r4, ne +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r2, r7 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r2, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: vmov r4, r5, d8 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r8, #-1 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csel r0, r0, r7, ne -; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: csel r6, r0, r6, ne +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r4 -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: vmov q0[3], q0[1], r9, r0 +; CHECK-NEXT: movne.w r6, #-1 ; CHECK-NEXT: ldr r0, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: vmov q0[2], q0[0], r6, r8 +; CHECK-NEXT: vmov q0[3], q0[1], r9, r0 +; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12} @@ -997,11 +942,11 @@ define arm_aapcs_vfpcc <6 x i32> @test_unsigned_v6f64_v6i32(<6 x double> %f) { ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI13_0: -; CHECK-NEXT: .long 4292870144 @ double 4294967295 -; CHECK-NEXT: .long 1106247679 -; CHECK-NEXT: .LCPI13_1: ; CHECK-NEXT: .long 0 @ double 0 ; CHECK-NEXT: .long 0 +; CHECK-NEXT: .LCPI13_1: +; CHECK-NEXT: .long 4292870144 @ double 4294967295 +; CHECK-NEXT: .long 1106247679 %x = call <6 x i32> @llvm.fptoui.sat.v6f64.v6i32(<6 x double> %f) ret <6 x i32> %x } @@ -1480,65 +1425,66 @@ define arm_aapcs_vfpcc <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) { ; CHECK-NEXT: .vsave {d8, d9, d10} ; CHECK-NEXT: vpush {d8, d9, d10} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: bl __aeabi_f2ulz +; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: vldr s20, .LCPI28_0 -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: vcmp.f32 s16, #0 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vmov r7, s18 +; CHECK-NEXT: vldr s20, .LCPI28_0 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r5, s16 -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movwgt r7, #65535 -; CHECK-NEXT: movtgt r7, #3 -; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vcmp.f32 s19, #0 +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: mov r10, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movwgt r10, #65535 -; CHECK-NEXT: movtgt r10, #3 -; CHECK-NEXT: movgt.w r4, #-1 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vcmp.f32 s16, #0 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 -; CHECK-NEXT: mov r1, r10 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s19, s20 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str.w r0, [r9] -; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r1, #65535 +; CHECK-NEXT: movtgt r1, #3 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r10, #65535 +; CHECK-NEXT: movtgt r10, #3 +; CHECK-NEXT: mov r3, r10 ; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: bfc r7, #18, #14 -; CHECK-NEXT: mov r6, r8 -; CHECK-NEXT: lsll r4, r1, #22 -; CHECK-NEXT: lsrl r6, r7, #28 +; CHECK-NEXT: bfc r3, #18, #14 +; CHECK-NEXT: mov r6, r7 +; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: lsll r4, r3, #22 +; CHECK-NEXT: lsrl r6, r1, #28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: str.w r5, [r8] ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #3 -; CHECK-NEXT: orrs r1, r7 -; CHECK-NEXT: str.w r1, [r9, #20] +; CHECK-NEXT: movwgt r9, #65535 +; CHECK-NEXT: movtgt r9, #3 +; CHECK-NEXT: orrs r1, r3 +; CHECK-NEXT: str.w r1, [r8, #20] ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s17, #0 ; CHECK-NEXT: orr.w r2, r6, r4 @@ -1547,24 +1493,23 @@ define arm_aapcs_vfpcc <4 x i50> @test_unsigned_v4f32_v4i50(<4 x float> %f) { ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: bfc r5, #18, #14 +; CHECK-NEXT: bfc r9, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #3 -; CHECK-NEXT: str.w r2, [r9, #16] +; CHECK-NEXT: str.w r2, [r8, #16] ; CHECK-NEXT: lsr.w r2, r10, #10 -; CHECK-NEXT: strb.w r2, [r9, #24] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: strb.w r2, [r8, #24] ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: mov r2, r0 ; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: orr.w r0, r5, r0, lsl #18 ; CHECK-NEXT: lsrl r2, r1, #14 -; CHECK-NEXT: orr.w r1, r1, r8, lsl #4 -; CHECK-NEXT: strd r2, r1, [r9, #8] -; CHECK-NEXT: str.w r0, [r9, #4] +; CHECK-NEXT: orr.w r0, r9, r0, lsl #18 +; CHECK-NEXT: orr.w r1, r1, r7, lsl #4 +; CHECK-NEXT: strd r2, r1, [r8, #8] +; CHECK-NEXT: str.w r0, [r8, #4] ; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} ; CHECK-NEXT: .p2align 2 @@ -1670,36 +1615,14 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: vldr s20, .LCPI30_0 -; CHECK-NEXT: vcmp.f32 s17, #0 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: str r5, [sp] @ 4-byte Spill ; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: vmov r6, s17 +; CHECK-NEXT: vldr s20, .LCPI30_0 ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r10, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 @@ -1711,16 +1634,37 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: str.w r1, [r4, #29] -; CHECK-NEXT: vmov r1, s19 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: str.w r0, [r4, #25] ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r10, #15 -; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: movgt r3, #15 +; CHECK-NEXT: vmov r7, s19 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: vcmp.f32 s17, #0 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s17, s20 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r3, #15 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r10, #-1 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vcmp.f32 s19, #0 ; CHECK-NEXT: mov r9, r1 @@ -1729,7 +1673,7 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: movlt.w r8, #0 ; CHECK-NEXT: movlt.w r11, #0 ; CHECK-NEXT: vcmp.f32 s19, s20 @@ -1737,7 +1681,7 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r11, #15 ; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r7, #-1 ; CHECK-NEXT: movgt.w r9, #-1 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s16, #0 @@ -1758,31 +1702,31 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) { ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: lsrl r0, r9, #28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: orr.w r1, r9, r8, lsl #4 -; CHECK-NEXT: str.w r1, [r4, #45] ; CHECK-NEXT: and r1, r11, #15 ; CHECK-NEXT: str.w r0, [r4, #41] -; CHECK-NEXT: and r0, r10, #15 +; CHECK-NEXT: mov r0, r10 +; CHECK-NEXT: lsrl r0, r5, #28 +; CHECK-NEXT: str r0, [r4, #16] +; CHECK-NEXT: orr.w r0, r9, r8, lsl #4 ; CHECK-NEXT: lsrl r8, r1, #28 +; CHECK-NEXT: str.w r0, [r4, #45] ; CHECK-NEXT: strb.w r8, [r4, #49] -; CHECK-NEXT: orr.w r0, r0, r5, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #37] -; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload -; CHECK-NEXT: mov r0, r2 -; CHECK-NEXT: lsrl r0, r7, #28 -; CHECK-NEXT: orr.w r1, r7, r6, lsl #4 -; CHECK-NEXT: strd r0, r1, [r4, #16] ; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: and r0, r0, #15 +; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #37] +; CHECK-NEXT: orr.w r0, r5, r6, lsl #4 +; CHECK-NEXT: str r0, [r4, #20] +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload ; CHECK-NEXT: and r1, r0, #15 ; CHECK-NEXT: lsrl r6, r1, #28 ; CHECK-NEXT: strb r6, [r4, #24] ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 ; CHECK-NEXT: and r0, r3, #15 -; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r10, lsl #4 ; CHECK-NEXT: str r0, [r4, #12] ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9, d10} @@ -1809,13 +1753,13 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vmov r5, s18 ; CHECK-NEXT: vldr s20, .LCPI31_0 ; CHECK-NEXT: vcmp.f32 s19, #0 +; CHECK-NEXT: add.w r12, r4, #48 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 @@ -1823,32 +1767,29 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: strd r5, r1, [r4, #48] -; CHECK-NEXT: vmov r6, s17 +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: vmov r7, s16 -; CHECK-NEXT: strd r2, r3, [r4, #56] +; CHECK-NEXT: vmov r6, s17 +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: add.w r12, r4, #32 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt +; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r3, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r6 @@ -1856,38 +1797,34 @@ define arm_aapcs_vfpcc <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) { ; CHECK-NEXT: vcmp.f32 s17, #0 ; CHECK-NEXT: add.w r12, r4, #16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt +; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r3, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt +; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r3, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} ; CHECK-NEXT: vpop {d8, d9, d10} @@ -1928,57 +1865,55 @@ define arm_aapcs_vfpcc <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) { ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI32_0 -; CHECK-NEXT: vmov r5, r6, d8 +; CHECK-NEXT: vmov r4, r8, d8 ; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: vmov r10, r9, d0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: vmov r10, r3, d0 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI32_1 -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: vmov r4, r11, d0 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_d2uiz +; CHECK-NEXT: vldr d0, .LCPI32_1 +; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: csel r7, r0, r9, ne +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: vmov r11, r3, d0 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: vmov r6, r5, d9 -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: csel r0, r0, r8, ne -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r8, r3 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: movs r7, #0 -; CHECK-NEXT: and r0, r0, #1 -; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: movne r7, #1 +; CHECK-NEXT: and r0, r7, #1 +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-NEXT: rsbs r0, r0, #0 -; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: bfi r7, r0, #0, #1 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: bl __aeabi_d2uiz ; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r0, r4, ne -; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: csel r4, r0, r4, ne +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #1 -; CHECK-NEXT: and r0, r0, #1 +; CHECK-NEXT: movne r4, #1 +; CHECK-NEXT: and r0, r4, #1 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r7, r0, #1, #1 ; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload @@ -1990,11 +1925,11 @@ define arm_aapcs_vfpcc <2 x i1> @test_unsigned_v2f64_v2i1(<2 x double> %f) { ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI32_0: -; CHECK-NEXT: .long 0 @ double 1 -; CHECK-NEXT: .long 1072693248 -; CHECK-NEXT: .LCPI32_1: ; CHECK-NEXT: .long 0 @ double 0 ; CHECK-NEXT: .long 0 +; CHECK-NEXT: .LCPI32_1: +; CHECK-NEXT: .long 0 @ double 1 +; CHECK-NEXT: .long 1072693248 %x = call <2 x i1> @llvm.fptoui.sat.v2f64.v2i1(<2 x double> %f) ret <2 x i1> %x } @@ -2008,84 +1943,60 @@ define arm_aapcs_vfpcc <2 x i8> @test_unsigned_v2f64_v2i8(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI33_0 ; CHECK-NEXT: vmov r6, r7, d9 -; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: vmov r11, r3, d0 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vldr d0, .LCPI33_1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: vmov r9, r8, d0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vmov r11, r4, d8 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: csel r9, r0, r8, ne +; CHECK-NEXT: csel r8, r1, r8, ne +; CHECK-NEXT: vmov r10, r3, d0 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #255 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: vmov r5, r4, d8 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #1 -; CHECK-NEXT: ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r8, #0 +; CHECK-NEXT: movne.w r9, #255 +; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r8, r4 +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: lsrs r4, r0, #5 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #255 -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r10 -; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: csel r7, r1, r6, ne +; CHECK-NEXT: csel r6, r0, r6, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne r6, #255 +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r6, r9 +; CHECK-NEXT: vmov q0[3], q0[1], r7, r8 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2110,84 +2021,60 @@ define arm_aapcs_vfpcc <2 x i13> @test_unsigned_v2f64_v2i13(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI34_0 ; CHECK-NEXT: vmov r6, r7, d9 -; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: vmov r11, r3, d0 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vldr d0, .LCPI34_1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: vmov r9, r8, d0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vmov r11, r4, d8 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: csel r9, r0, r8, ne +; CHECK-NEXT: csel r8, r1, r8, ne +; CHECK-NEXT: vmov r10, r3, d0 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movwne r5, #8191 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: vmov r5, r4, d8 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #1 -; CHECK-NEXT: ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r8, #0 +; CHECK-NEXT: movwne r9, #8191 +; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r8, r4 +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: lsrs r4, r0, #5 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movwne r0, #8191 -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r10 -; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: csel r7, r1, r6, ne +; CHECK-NEXT: csel r6, r0, r6, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movwne r6, #8191 +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r6, r9 +; CHECK-NEXT: vmov q0[3], q0[1], r7, r8 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2212,84 +2099,60 @@ define arm_aapcs_vfpcc <2 x i16> @test_unsigned_v2f64_v2i16(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI35_0 ; CHECK-NEXT: vmov r6, r7, d9 -; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: vmov r11, r3, d0 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vldr d0, .LCPI35_1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: vmov r9, r8, d0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vmov r11, r4, d8 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: csel r9, r0, r8, ne +; CHECK-NEXT: csel r8, r1, r8, ne +; CHECK-NEXT: vmov r10, r3, d0 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movwne r5, #65535 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: vmov r5, r4, d8 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #1 -; CHECK-NEXT: ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r8, #0 +; CHECK-NEXT: movwne r9, #65535 +; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r8, r4 +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: lsrs r4, r0, #5 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movwne r0, #65535 -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r10 -; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: csel r7, r1, r6, ne +; CHECK-NEXT: csel r6, r0, r6, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movwne r6, #65535 +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r6, r9 +; CHECK-NEXT: vmov q0[3], q0[1], r7, r8 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2318,79 +2181,61 @@ define arm_aapcs_vfpcc <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) { ; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI36_0 -; CHECK-NEXT: vmov r11, r10, d8 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI36_1 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vmov r11, r5, d8 +; CHECK-NEXT: vmov r6, r7, d0 +; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: vmov r5, r7, d0 -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: vmov r8, r6, d9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #1 -; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: vldr d0, .LCPI36_1 +; CHECK-NEXT: vmov r5, r8, d9 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vmov r3, r2, d0 +; CHECK-NEXT: csel r9, r1, r4, ne +; CHECK-NEXT: csel r10, r0, r4, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: lsr.w r9, r0, #5 -; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: csel r6, r0, r4, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: csel r4, r1, r4, ne +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ittt ne +; CHECK-NEXT: movne r4, #0 +; CHECK-NEXT: movwne r6, #65535 +; CHECK-NEXT: movtne r6, #7 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r5, #65535 -; CHECK-NEXT: movtne r5, #7 -; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r0, #65535 -; CHECK-NEXT: movtne r0, #7 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ittt ne +; CHECK-NEXT: movwne r10, #65535 +; CHECK-NEXT: movtne r10, #7 +; CHECK-NEXT: movne.w r9, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r10, r6 +; CHECK-NEXT: vmov q0[3], q0[1], r9, r4 ; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 @@ -2398,11 +2243,11 @@ define arm_aapcs_vfpcc <2 x i19> @test_unsigned_v2f64_v2i19(<2 x double> %f) { ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI36_0: -; CHECK-NEXT: .long 0 @ double 524287 -; CHECK-NEXT: .long 1092616188 -; CHECK-NEXT: .LCPI36_1: ; CHECK-NEXT: .long 0 @ double 0 ; CHECK-NEXT: .long 0 +; CHECK-NEXT: .LCPI36_1: +; CHECK-NEXT: .long 0 @ double 524287 +; CHECK-NEXT: .long 1092616188 %x = call <2 x i19> @llvm.fptoui.sat.v2f64.v2i19(<2 x double> %f) ret <2 x i19> %x } @@ -2416,84 +2261,60 @@ define arm_aapcs_vfpcc <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x doubl ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI37_0 ; CHECK-NEXT: vmov r6, r7, d9 -; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: vmov r11, r3, d0 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vldr d0, .LCPI37_1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: vmov r9, r8, d0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vmov r11, r4, d8 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: csel r9, r0, r8, ne +; CHECK-NEXT: csel r8, r1, r8, ne +; CHECK-NEXT: vmov r10, r3, d0 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r5, #-1 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: vmov r5, r4, d8 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #1 -; CHECK-NEXT: ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r8, #0 +; CHECK-NEXT: movne.w r9, #-1 +; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r8, r4 +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: lsrs r4, r0, #5 -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r10 -; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: csel r7, r1, r6, ne +; CHECK-NEXT: csel r6, r0, r6, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r6, #-1 +; CHECK-NEXT: movne r7, #0 +; CHECK-NEXT: vmov q0[2], q0[0], r6, r9 +; CHECK-NEXT: vmov q0[3], q0[1], r7, r8 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2522,79 +2343,61 @@ define arm_aapcs_vfpcc <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) { ; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI38_0 -; CHECK-NEXT: vmov r11, r10, d8 -; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: strd r2, r3, [sp, #8] @ 8-byte Folded Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI38_1 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: vmov r11, r5, d8 +; CHECK-NEXT: vmov r6, r7, d0 +; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: vmov r5, r7, d0 -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: vmov r8, r6, d9 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r4, #1 -; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: vldr d0, .LCPI38_1 +; CHECK-NEXT: vmov r5, r8, d9 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: vmov r3, r2, d0 +; CHECK-NEXT: csel r10, r0, r4, ne +; CHECK-NEXT: csel r9, r1, r4, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r6 -; CHECK-NEXT: lsr.w r9, r0, #5 -; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r8 ; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #0 -; CHECK-NEXT: ldrd r2, r3, [sp, #8] @ 8-byte Folded Reload -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: csel r6, r1, r4, ne +; CHECK-NEXT: csel r4, r0, r4, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r1, r8 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ittt ne +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: movwne r6, #65535 +; CHECK-NEXT: movtne r6, #3 +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r10 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r7, #65535 -; CHECK-NEXT: movtne r7, #3 -; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: itt ne -; CHECK-NEXT: movwne r1, #65535 -; CHECK-NEXT: movtne r1, #3 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r5, #-1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #0 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r7 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ittt ne +; CHECK-NEXT: movwne r9, #65535 +; CHECK-NEXT: movtne r9, #3 +; CHECK-NEXT: movne.w r10, #-1 +; CHECK-NEXT: vmov q0[2], q0[0], r10, r4 +; CHECK-NEXT: vmov q0[3], q0[1], r9, r6 ; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 @@ -2602,11 +2405,11 @@ define arm_aapcs_vfpcc <2 x i50> @test_unsigned_v2f64_v2i50(<2 x double> %f) { ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI38_0: -; CHECK-NEXT: .long 4294967288 @ double 1125899906842623 -; CHECK-NEXT: .long 1125122047 -; CHECK-NEXT: .LCPI38_1: ; CHECK-NEXT: .long 0 @ double 0 ; CHECK-NEXT: .long 0 +; CHECK-NEXT: .LCPI38_1: +; CHECK-NEXT: .long 4294967288 @ double 1125899906842623 +; CHECK-NEXT: .long 1125122047 %x = call <2 x i50> @llvm.fptoui.sat.v2f64.v2i50(<2 x double> %f) ret <2 x i50> %x } @@ -2620,84 +2423,60 @@ define arm_aapcs_vfpcc <2 x i64> @test_unsigned_v2f64_v2i64(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI39_0 ; CHECK-NEXT: vmov r6, r7, d9 -; CHECK-NEXT: vmov r2, r3, d0 +; CHECK-NEXT: vmov r11, r3, d0 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: strd r3, r2, [sp, #4] @ 8-byte Folded Spill +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r7 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: vldr d0, .LCPI39_1 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: vmov r9, r8, d0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: vmov r11, r4, d8 -; CHECK-NEXT: lsrs r0, r0, #5 -; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: csel r9, r0, r8, ne +; CHECK-NEXT: csel r8, r1, r8, ne +; CHECK-NEXT: vmov r10, r3, d0 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r5, #0 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r6, #1 -; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r5, #-1 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: vmov r5, r4, d8 +; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r2, r10 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r7, #1 -; CHECK-NEXT: ldrd r3, r2, [sp, #4] @ 8-byte Folded Reload -; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r8, #-1 +; CHECK-NEXT: movne.w r9, #-1 +; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r8, r4 +; CHECK-NEXT: mov r2, r11 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: clz r0, r0 -; CHECK-NEXT: mov r1, r8 -; CHECK-NEXT: lsrs r4, r0, #5 -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: bl __aeabi_d2ulz -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr r2, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: vmov q0[2], q0[0], r0, r5 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #0 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: ldr r3, [sp] @ 4-byte Reload ; CHECK-NEXT: cmp r6, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r10, #-1 -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne r1, #0 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r1, #-1 -; CHECK-NEXT: vmov q0[3], q0[1], r1, r10 -; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: csel r7, r1, r6, ne +; CHECK-NEXT: csel r6, r0, r6, ne +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: itt ne +; CHECK-NEXT: movne.w r6, #-1 +; CHECK-NEXT: movne.w r7, #-1 +; CHECK-NEXT: vmov q0[2], q0[0], r6, r9 +; CHECK-NEXT: vmov q0[3], q0[1], r7, r8 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} @@ -2726,192 +2505,186 @@ define arm_aapcs_vfpcc <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) { ; CHECK-NEXT: sub sp, #48 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI40_0 -; CHECK-NEXT: vmov r6, r5, d8 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r2, r7, d0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: mov r9, r2 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: vldr d0, .LCPI40_1 -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: vmov r11, r4, d8 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: vmov r2, r3, d0 -; CHECK-NEXT: str r2, [sp, #44] @ 4-byte Spill -; CHECK-NEXT: mov r10, r3 +; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r9, r2 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r4 +; CHECK-NEXT: mov r5, r4 ; CHECK-NEXT: bl __fixunsdfti +; CHECK-NEXT: vldr d0, .LCPI40_1 ; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: strd r1, r0, [sp, #20] @ 8-byte Folded Spill -; CHECK-NEXT: csel r0, r2, r8, ne -; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill -; CHECK-NEXT: cmp.w r11, #0 +; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: csel r4, r2, r8, ne +; CHECK-NEXT: vmov r10, r3, d0 +; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: str r5, [sp, #40] @ 4-byte Spill +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r8, r3 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str r0, [r4, #8] -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: str r4, [r6, #8] +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: ldr r7, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: mov r1, r5 ; CHECK-NEXT: mov r2, r9 ; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r11, r6 -; CHECK-NEXT: ldr r6, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: mov r2, r6 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r7 -; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp.w r8, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str r0, [r4, #4] +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r0, r11 ; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: strd r4, r11, [sp, #28] @ 8-byte Folded Spill -; CHECK-NEXT: str r5, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str.w r10, [sp, #44] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: str r4, [r6, #4] +; CHECK-NEXT: mov r5, r6 +; CHECK-NEXT: str r6, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r10 +; CHECK-NEXT: ldr r6, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: mov r2, r9 +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: str.w r11, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: mov r1, r6 ; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: csel r4, r1, r0, ne +; CHECK-NEXT: mov r1, r6 +; CHECK-NEXT: mov r0, r11 +; CHECK-NEXT: mov r6, r8 +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: vmov r8, r11, d9 -; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r5, r9 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: mov r2, r6 +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: str r4, [r5] +; CHECK-NEXT: mov r10, r9 +; CHECK-NEXT: str.w r9, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: mov r5, r7 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: mov r6, r10 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: add.w r12, sp, #16 -; CHECK-NEXT: cmp.w r10, #0 -; CHECK-NEXT: stm.w r12, {r0, r2, r3} @ 12-byte Folded Spill -; CHECK-NEXT: csel r9, r1, r10, ne +; CHECK-NEXT: ldr r4, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: strd r3, r0, [sp, #16] @ 8-byte Folded Spill +; CHECK-NEXT: csel r7, r1, r9, ne +; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r9, #-1 -; CHECK-NEXT: mov r7, r5 -; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r10, r4 +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: str r6, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r5, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: str r0, [sp] @ 4-byte Spill +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: str r6, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r7, #-1 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: csel r4, r1, r0, ne -; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: csel r9, r1, r0, ne +; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r0, r9 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: ldr.w r9, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: lsrl r0, r7, #28 +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: str.w r0, [r9, #16] ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: lsrl r4, r9, #28 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: csel r10, r1, r0, ne ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: mov r3, r6 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: csel r6, r1, r0, ne -; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r6, #-1 -; CHECK-NEXT: ldr r5, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: orr.w r0, r9, r6, lsl #4 -; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: strd r4, r0, [r5, #16] -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: ldr.w r9, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: movne.w r10, #-1 +; CHECK-NEXT: orr.w r0, r7, r10, lsl #4 +; CHECK-NEXT: str.w r0, [r9, #20] ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r11 -; CHECK-NEXT: ldr.w r11, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: ldr.w r8, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: ldr r5, [sp, #44] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: csel r7, r1, r0, ne +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r8, r6 +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: mov r3, r10 -; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp r4, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #15 -; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r6, r1, #28 -; CHECK-NEXT: strb r6, [r5, #24] -; CHECK-NEXT: ldr r6, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: ldr r4, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: movne r7, #15 +; CHECK-NEXT: and r1, r7, #15 +; CHECK-NEXT: lsrl r10, r1, #28 +; CHECK-NEXT: strb.w r10, [r9, #24] +; CHECK-NEXT: ldr r6, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: ldr r4, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: ldrd r2, r3, [sp, #32] @ 8-byte Folded Reload ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: csel r7, r1, r0, ne ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: mov r1, r4 -; CHECK-NEXT: mov r2, r11 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne r0, #15 -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: and r0, r0, #15 +; CHECK-NEXT: movne r7, #15 +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: and r0, r7, #15 ; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 -; CHECK-NEXT: str r0, [r5, #12] +; CHECK-NEXT: str.w r0, [r9, #12] ; CHECK-NEXT: add sp, #48 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 @@ -2919,11 +2692,11 @@ define arm_aapcs_vfpcc <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) { ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI40_0: -; CHECK-NEXT: .long 4294967295 @ double 1.2676506002282293E+30 -; CHECK-NEXT: .long 1177550847 -; CHECK-NEXT: .LCPI40_1: ; CHECK-NEXT: .long 0 @ double 0 ; CHECK-NEXT: .long 0 +; CHECK-NEXT: .LCPI40_1: +; CHECK-NEXT: .long 4294967295 @ double 1.2676506002282293E+30 +; CHECK-NEXT: .long 1177550847 %x = call <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double> %f) ret <2 x i100> %x } @@ -2937,196 +2710,185 @@ define arm_aapcs_vfpcc <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) { ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI41_0 ; CHECK-NEXT: vmov r8, r7, d9 -; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: vmov r6, r4, d0 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r2, r9, d0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: mov r11, r2 +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: vldr d0, .LCPI41_1 -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: cmp r6, #0 +; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: csel r6, r3, r6, ne +; CHECK-NEXT: vmov r10, r5, d0 +; CHECK-NEXT: str r2, [sp] @ 4-byte Spill ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: vmov r10, r11, d0 +; CHECK-NEXT: str r5, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill -; CHECK-NEXT: csel r0, r3, r5, ne -; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: cmp.w r9, #0 +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: str r0, [r5, #28] +; CHECK-NEXT: movne.w r6, #-1 +; CHECK-NEXT: str r6, [r4, #28] +; CHECK-NEXT: str.w r11, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: csel r5, r1, r0, ne ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: str r6, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: mov r1, r7 +; CHECK-NEXT: mov r3, r6 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r5, #-1 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: str.w r10, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: mov r2, r11 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: str r5, [r4, #24] +; CHECK-NEXT: mov r5, r4 +; CHECK-NEXT: mov r4, r9 ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r6 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str r0, [r5, #24] +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r6 +; CHECK-NEXT: csel r9, r1, r0, ne ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: str r4, [sp] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r9, #-1 +; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: str.w r9, [r5, #20] ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: vmov r6, r11, d8 +; CHECK-NEXT: mov r9, r4 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: vmov r6, r5, d8 -; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: ldr.w r9, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: str.w r0, [r9, #20] +; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: ldr r4, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: mov r3, r11 -; CHECK-NEXT: mov r7, r11 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: str.w r11, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r11, r9 -; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp.w r10, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str.w r0, [r9, #16] -; CHECK-NEXT: ldr.w r8, [sp, #28] @ 4-byte Reload +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: str r4, [r5, #16] ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: ldr.w r9, [sp] @ 4-byte Reload -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: ldr r5, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: strd r1, r0, [sp, #16] @ 8-byte Folded Spill -; CHECK-NEXT: csel r0, r3, r7, ne -; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: cmp.w r10, #0 +; CHECK-NEXT: cmp.w r8, #0 +; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill +; CHECK-NEXT: csel r4, r3, r8, ne +; CHECK-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r7 +; CHECK-NEXT: mov r8, r7 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str.w r0, [r11, #12] +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: mov r7, r11 +; CHECK-NEXT: str r4, [r7, #12] +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: csel r4, r1, r0, ne +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r10, r4 -; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r2, r5 +; CHECK-NEXT: mov r3, r9 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: str r4, [r7, #8] ; CHECK-NEXT: bl __aeabi_dcmpge ; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r8 -; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp.w r11, #0 -; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str r0, [r7, #8] +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r2, r8 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r1, r11 +; CHECK-NEXT: mov r2, r5 ; CHECK-NEXT: mov r3, r9 -; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp.w r11, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str r0, [r7, #4] +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: str r4, [r7, #4] +; CHECK-NEXT: bl __aeabi_dcmpge +; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: csel r4, r1, r0, ne ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r1, r11 ; CHECK-NEXT: bl __aeabi_dcmpgt -; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: mov r2, r10 -; CHECK-NEXT: mov r3, r4 -; CHECK-NEXT: bl __aeabi_dcmpge -; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r0, r1, r0, ne -; CHECK-NEXT: cmp.w r8, #0 ; CHECK-NEXT: it ne -; CHECK-NEXT: movne.w r0, #-1 -; CHECK-NEXT: str r0, [r7] -; CHECK-NEXT: add sp, #32 +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: str r4, [r7] +; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 3 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI41_0: -; CHECK-NEXT: .long 4294967295 @ double 3.4028236692093843E+38 -; CHECK-NEXT: .long 1206910975 -; CHECK-NEXT: .LCPI41_1: ; CHECK-NEXT: .long 0 @ double 0 ; CHECK-NEXT: .long 0 +; CHECK-NEXT: .LCPI41_1: +; CHECK-NEXT: .long 4294967295 @ double 3.4028236692093843E+38 +; CHECK-NEXT: .long 1206910975 %x = call <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double> %f) ret <2 x i128> %x } @@ -3571,81 +3333,86 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) { ; CHECK-NEXT: .pad #24 ; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtb.f32.f16 s22, s17 +; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: vcvtb.f32.f16 s22, s18 +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-NEXT: vmov r0, s22 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcvtb.f32.f16 s24, s18 -; CHECK-NEXT: mov r2, r0 -; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: vcvtt.f32.f16 s26, s17 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: vmov r0, s26 ; CHECK-NEXT: vcvtt.f32.f16 s20, s18 -; CHECK-NEXT: vldr s18, .LCPI48_0 +; CHECK-NEXT: vcvtb.f32.f16 s24, s17 ; CHECK-NEXT: vcmp.f32 s22, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcvtt.f32.f16 s26, s17 -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vcmp.f32 s22, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r1, #65535 -; CHECK-NEXT: movtgt r1, #3 -; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmov r5, s26 -; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: vmov r6, s20 +; CHECK-NEXT: vmov r8, s20 +; CHECK-NEXT: vldr s18, .LCPI48_0 +; CHECK-NEXT: vmov r9, s24 +; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: bl __aeabi_f2ulz +; CHECK-NEXT: vcmp.f32 s26, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r0, r9 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s24, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vcmp.f32 s24, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: vcmp.f32 s26, s18 +; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #3 -; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: vcmp.f32 s22, s18 +; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s26, s18 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r1, #65535 -; CHECK-NEXT: movtgt r1, #3 -; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: str.w r7, [r4, #25] +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #3 +; CHECK-NEXT: str r5, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: str.w r6, [r10, #25] +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r4, #65535 +; CHECK-NEXT: movtgt r4, #3 +; CHECK-NEXT: str r4, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s20, #0 ; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s20, s18 ; CHECK-NEXT: vcvtb.f32.f16 s20, s19 -; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmov r0, s20 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movwgt r7, #65535 -; CHECK-NEXT: movtgt r7, #3 +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #3 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s20, #0 ; CHECK-NEXT: mov r9, r0 @@ -3668,16 +3435,16 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s20, s18 ; CHECK-NEXT: vcvtb.f32.f16 s20, s16 -; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: vmov r0, s20 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #3 +; CHECK-NEXT: movwgt r7, #65535 +; CHECK-NEXT: movtgt r7, #3 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s20, #0 ; CHECK-NEXT: mov r8, r1 @@ -3690,36 +3457,36 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload ; CHECK-NEXT: mov r2, r9 -; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: lsrs r0, r5, #10 -; CHECK-NEXT: bfc r5, #18, #14 -; CHECK-NEXT: lsll r10, r5, #22 ; CHECK-NEXT: lsrl r2, r11, #28 -; CHECK-NEXT: orr.w r1, r11, r5 +; CHECK-NEXT: bfc r5, #18, #14 +; CHECK-NEXT: vcvtt.f32.f16 s16, s16 +; CHECK-NEXT: str r0, [r4] +; CHECK-NEXT: lsr.w r0, r7, #10 +; CHECK-NEXT: bfc r7, #18, #14 +; CHECK-NEXT: lsll r10, r7, #22 +; CHECK-NEXT: orr.w r1, r11, r7 ; CHECK-NEXT: str.w r1, [r4, #45] ; CHECK-NEXT: orr.w r1, r2, r10 ; CHECK-NEXT: str.w r1, [r4, #41] ; CHECK-NEXT: strb.w r0, [r4, #49] -; CHECK-NEXT: bfc r7, #18, #14 ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 -; CHECK-NEXT: lsrl r0, r7, #14 -; CHECK-NEXT: mov r5, r4 -; CHECK-NEXT: orr.w r1, r7, r9, lsl #4 +; CHECK-NEXT: lsrl r0, r5, #14 +; CHECK-NEXT: mov r7, r4 +; CHECK-NEXT: orr.w r1, r5, r9, lsl #4 ; CHECK-NEXT: str.w r1, [r4, #37] ; CHECK-NEXT: str.w r0, [r4, #33] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: bfc r0, #18, #14 ; CHECK-NEXT: orr.w r0, r0, r6, lsl #18 ; CHECK-NEXT: str.w r0, [r4, #29] ; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r5, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: ldr r3, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: ldr.w r9, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: mov r1, r7 -; CHECK-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: bfc r1, #18, #14 ; CHECK-NEXT: bfc r3, #18, #14 ; CHECK-NEXT: mov r6, r9 @@ -3729,7 +3496,7 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) { ; CHECK-NEXT: movwgt r8, #65535 ; CHECK-NEXT: movtgt r8, #3 ; CHECK-NEXT: orrs r1, r3 -; CHECK-NEXT: str r1, [r5, #20] +; CHECK-NEXT: str r1, [r7, #20] ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: orr.w r2, r6, r4 @@ -3743,19 +3510,18 @@ define arm_aapcs_vfpcc <8 x i50> @test_unsigned_v8f16_v8i50(<8 x half> %f) { ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #3 -; CHECK-NEXT: str r2, [r5, #16] -; CHECK-NEXT: lsrs r2, r7, #10 -; CHECK-NEXT: strb r2, [r5, #24] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: str r2, [r7, #16] +; CHECK-NEXT: lsr.w r2, r5, #10 +; CHECK-NEXT: strb r2, [r7, #24] ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: mov r2, r0 ; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: orr.w r0, r8, r0, lsl #18 ; CHECK-NEXT: lsrl r2, r1, #14 +; CHECK-NEXT: orr.w r0, r8, r0, lsl #18 ; CHECK-NEXT: orr.w r1, r1, r9, lsl #4 -; CHECK-NEXT: strd r2, r1, [r5, #8] -; CHECK-NEXT: str r0, [r5, #4] +; CHECK-NEXT: strd r2, r1, [r7, #8] +; CHECK-NEXT: str r0, [r7, #4] ; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 @@ -3778,38 +3544,38 @@ define arm_aapcs_vfpcc <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) { ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtt.f32.f16 s20, s19 -; CHECK-NEXT: vmov r0, s20 +; CHECK-NEXT: vcvtt.f32.f16 s22, s19 +; CHECK-NEXT: vmov r0, s22 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcvtb.f32.f16 s22, s19 +; CHECK-NEXT: vcvtb.f32.f16 s26, s19 ; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r0, s22 +; CHECK-NEXT: vmov r0, s26 ; CHECK-NEXT: vldr s28, .LCPI49_0 -; CHECK-NEXT: vcmp.f32 s20, #0 +; CHECK-NEXT: vcmp.f32 s22, #0 ; CHECK-NEXT: mov r8, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcvtt.f32.f16 s20, s16 ; CHECK-NEXT: vcvtt.f32.f16 s24, s18 -; CHECK-NEXT: vcvtt.f32.f16 s26, s16 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r9, #0 ; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: vcmp.f32 s20, s28 +; CHECK-NEXT: vcmp.f32 s22, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vmov r6, s20 ; CHECK-NEXT: vmov r4, s24 -; CHECK-NEXT: vmov r6, s26 ; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r8, #-1 ; CHECK-NEXT: movgt.w r9, #-1 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vcmp.f32 s22, #0 +; CHECK-NEXT: vcmp.f32 s26, #0 ; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt.w r11, #0 ; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: vcmp.f32 s22, s28 +; CHECK-NEXT: vcmp.f32 s26, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r10, #-1 @@ -3833,12 +3599,12 @@ define arm_aapcs_vfpcc <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) { ; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vcmp.f32 s26, #0 +; CHECK-NEXT: vcmp.f32 s20, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vcmp.f32 s26, s28 +; CHECK-NEXT: vcmp.f32 s20, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r6, #-1 @@ -3931,58 +3697,83 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: sub sp, #56 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtt.f32.f16 s22, s16 +; CHECK-NEXT: vcvtb.f32.f16 s22, s17 ; CHECK-NEXT: vmov r0, s22 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcvtt.f32.f16 s24, s17 -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vcvtb.f32.f16 s24, s18 +; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: vldr s20, .LCPI50_0 ; CHECK-NEXT: vcmp.f32 s22, #0 +; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r10, r2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: movlt.w r8, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 ; CHECK-NEXT: str r3, [sp, #52] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str r2, [sp, #48] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: str r1, [sp, #44] @ 4-byte Spill -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r7, #-1 -; CHECK-NEXT: str r7, [sp, #40] @ 4-byte Spill +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: movgt.w r10, #-1 ; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: vcvtb.f32.f16 s22, s19 +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vmov r0, s22 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vcmp.f32 s24, #0 -; CHECK-NEXT: vcvtt.f32.f16 s22, s18 +; CHECK-NEXT: mov r7, r2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #48] @ 4-byte Spill +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: vcmp.f32 s22, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s22, s20 +; CHECK-NEXT: itttt lt +; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r1, #0 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str r2, [sp, #32] @ 4-byte Spill +; CHECK-NEXT: str.w r2, [r4, #83] +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r1, #-1 +; CHECK-NEXT: str.w r1, [r4, #79] ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill +; CHECK-NEXT: vcvtt.f32.f16 s22, s16 +; CHECK-NEXT: str.w r0, [r4, #75] ; CHECK-NEXT: vmov r0, s22 +; CHECK-NEXT: str.w r7, [r4, #58] +; CHECK-NEXT: str.w r6, [r4, #54] +; CHECK-NEXT: str.w r5, [r4, #50] +; CHECK-NEXT: str.w r10, [r4, #33] +; CHECK-NEXT: str.w r9, [r4, #29] +; CHECK-NEXT: str.w r8, [r4, #25] ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: movgt r3, #15 +; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s22, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -3995,115 +3786,85 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #44] @ 4-byte Spill ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: str r2, [sp, #36] @ 4-byte Spill ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: str r1, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: str r1, [sp, #32] @ 4-byte Spill ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcvtb.f32.f16 s22, s17 -; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: vcvtt.f32.f16 s22, s17 +; CHECK-NEXT: str r0, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: vmov r0, s22 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcvtb.f32.f16 s18, s18 -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: mov r8, r1 ; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: mov r6, r2 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vcmp.f32 s22, s20 +; CHECK-NEXT: vcvtt.f32.f16 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: ittt gt -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: vcvtb.f32.f16 s18, s19 -; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r2, #-1 +; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill ; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: mov r7, r2 -; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt.w r11, #0 -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movlt r3, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt r3, #15 -; CHECK-NEXT: str r3, [sp] @ 4-byte Spill -; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: movgt.w r11, #-1 -; CHECK-NEXT: movgt.w r7, #-1 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r10, r3 +; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r8, r2 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: movlt r2, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: str.w r2, [r4, #83] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: str.w r1, [r4, #79] +; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt r3, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt +; CHECK-NEXT: movgt r3, #15 +; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: ittt gt +; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: movgt.w r9, #-1 ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vcvtt.f32.f16 s18, s19 -; CHECK-NEXT: str.w r0, [r4, #75] +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: str.w r7, [r4, #58] -; CHECK-NEXT: str.w r11, [r4, #54] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r5, [r4, #50] -; CHECK-NEXT: str.w r6, [r4, #33] -; CHECK-NEXT: str.w r8, [r4, #29] -; CHECK-NEXT: str.w r9, [r4, #25] -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r10, #15 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcvtb.f32.f16 s16, s16 -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: mov r8, r3 +; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: mov r7, r3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt.w r11, #0 ; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itttt gt -; CHECK-NEXT: movgt.w r8, #15 +; CHECK-NEXT: movgt r7, #15 +; CHECK-NEXT: movgt.w r10, #-1 ; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: movgt.w r5, #-1 -; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: movgt.w r11, #-1 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -4123,65 +3884,65 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) { ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: str r0, [r4] -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: lsrl r0, r7, #28 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: orr.w r1, r7, r6, lsl #4 -; CHECK-NEXT: str.w r1, [r4, #95] -; CHECK-NEXT: and r1, r8, #15 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: lsrl r0, r11, #28 +; CHECK-NEXT: and r1, r7, #15 ; CHECK-NEXT: str.w r0, [r4, #91] -; CHECK-NEXT: and r0, r10, #15 -; CHECK-NEXT: lsrl r6, r1, #28 -; CHECK-NEXT: strb.w r6, [r4, #99] -; CHECK-NEXT: orr.w r0, r0, r5, lsl #4 -; CHECK-NEXT: str.w r0, [r4, #87] -; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: lsrl r0, r1, #28 -; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 -; CHECK-NEXT: str.w r1, [r4, #70] +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: lsrl r0, r9, #28 ; CHECK-NEXT: str.w r0, [r4, #66] -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr.w lr, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: mov r0, lr +; CHECK-NEXT: lsrl r0, r5, #28 +; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: str.w r0, [r4, #41] +; CHECK-NEXT: ldr.w r12, [sp, #24] @ 4-byte Reload +; CHECK-NEXT: ldr r5, [sp, #32] @ 4-byte Reload +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: lsrl r0, r5, #28 +; CHECK-NEXT: str r0, [r4, #16] +; CHECK-NEXT: orr.w r0, r11, r10, lsl #4 +; CHECK-NEXT: lsrl r10, r1, #28 +; CHECK-NEXT: str.w r0, [r4, #95] +; CHECK-NEXT: strb.w r10, [r4, #99] +; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload +; CHECK-NEXT: and r0, r0, #15 +; CHECK-NEXT: orr.w r0, r0, r6, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #87] +; CHECK-NEXT: orr.w r0, r9, r8, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #70] +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: and r1, r0, #15 -; CHECK-NEXT: lsrl r2, r1, #28 -; CHECK-NEXT: strb.w r2, [r4, #74] -; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-NEXT: lsrl r8, r1, #28 +; CHECK-NEXT: strb.w r8, [r4, #74] +; CHECK-NEXT: ldr r0, [sp, #48] @ 4-byte Reload ; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 ; CHECK-NEXT: str.w r0, [r4, #62] -; CHECK-NEXT: ldr r7, [sp, #24] @ 4-byte Reload -; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #32] @ 4-byte Reload -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: lsrl r0, r1, #28 -; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 -; CHECK-NEXT: str.w r1, [r4, #45] -; CHECK-NEXT: str.w r0, [r4, #41] -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r0, r2, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #45] +; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload ; CHECK-NEXT: and r1, r0, #15 ; CHECK-NEXT: lsrl r2, r1, #28 ; CHECK-NEXT: strb.w r2, [r4, #49] -; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload ; CHECK-NEXT: and r0, r0, #15 -; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: orr.w r0, r0, lr, lsl #4 ; CHECK-NEXT: str.w r0, [r4, #37] -; CHECK-NEXT: ldr r7, [sp, #40] @ 4-byte Reload -; CHECK-NEXT: ldr r1, [sp, #44] @ 4-byte Reload -; CHECK-NEXT: ldr r2, [sp, #48] @ 4-byte Reload -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: lsrl r0, r1, #28 -; CHECK-NEXT: orr.w r1, r1, r2, lsl #4 -; CHECK-NEXT: strd r0, r1, [r4, #16] -; CHECK-NEXT: ldr r0, [sp, #52] @ 4-byte Reload +; CHECK-NEXT: ldr r2, [sp, #36] @ 4-byte Reload +; CHECK-NEXT: orr.w r0, r5, r2, lsl #4 +; CHECK-NEXT: str r0, [r4, #20] +; CHECK-NEXT: ldr r0, [sp, #44] @ 4-byte Reload ; CHECK-NEXT: and r1, r0, #15 ; CHECK-NEXT: lsrl r2, r1, #28 ; CHECK-NEXT: strb r2, [r4, #24] ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt r3, #15 ; CHECK-NEXT: and r0, r3, #15 -; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r12, lsl #4 ; CHECK-NEXT: str r0, [r4, #12] ; CHECK-NEXT: add sp, #56 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12} @@ -4206,64 +3967,61 @@ define arm_aapcs_vfpcc <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtt.f32.f16 s22, s19 -; CHECK-NEXT: vmov r0, s22 -; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: vcvtt.f32.f16 s24, s19 +; CHECK-NEXT: vcvtb.f32.f16 s22, s16 +; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: vcvtb.f32.f16 s28, s19 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r0, s28 ; CHECK-NEXT: vldr s20, .LCPI51_0 -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: vcvtt.f32.f16 s24, s18 +; CHECK-NEXT: vmov r5, s22 +; CHECK-NEXT: vmov r7, s28 +; CHECK-NEXT: vcvtt.f32.f16 s26, s18 +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: add.w r12, r4, #112 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: vcvtb.f32.f16 s26, s18 ; CHECK-NEXT: itttt lt -; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: vcvtb.f32.f16 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: strd r6, r1, [r4, #112] -; CHECK-NEXT: vmov r7, s24 -; CHECK-NEXT: vmov r5, s26 -; CHECK-NEXT: vcvtt.f32.f16 s18, s17 -; CHECK-NEXT: strd r2, r3, [r4, #120] +; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: vmov r6, s26 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s28, #0 ; CHECK-NEXT: add.w r12, r4, #96 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s28, s20 +; CHECK-NEXT: vcvtt.f32.f16 s24, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r7 -; CHECK-NEXT: vmov r6, s18 -; CHECK-NEXT: vcvtb.f32.f16 s22, s17 +; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: vmov r7, s18 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: vcmp.f32 s26, #0 ; CHECK-NEXT: add.w r12, r4, #80 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 @@ -4272,116 +4030,105 @@ define arm_aapcs_vfpcc <8 x i128> @test_unsigned_v8f16_v8i128(<8 x half> %f) { ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r5 -; CHECK-NEXT: vcvtt.f32.f16 s24, s16 -; CHECK-NEXT: vmov r7, s22 +; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: vmov r6, s24 +; CHECK-NEXT: vcvtb.f32.f16 s26, s17 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s26, #0 +; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: add.w r12, r4, #64 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: vcvtt.f32.f16 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: vmov r5, s24 -; CHECK-NEXT: vcvtb.f32.f16 s16, s16 +; CHECK-NEXT: vmov r7, s26 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: vcmp.f32 s24, #0 ; CHECK-NEXT: add.w r12, r4, #48 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r3, #-1 -; CHECK-NEXT: it gt +; CHECK-NEXT: ittt gt ; CHECK-NEXT: movgt.w r2, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt ; CHECK-NEXT: movgt.w r1, #-1 ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} ; CHECK-NEXT: mov r0, r7 ; CHECK-NEXT: vmov r6, s16 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s22, #0 +; CHECK-NEXT: vcmp.f32 s26, #0 ; CHECK-NEXT: add.w r12, r4, #32 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt +; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r3, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: add.w r12, r4, #16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt +; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r3, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: stm.w r12, {r0, r1, r2, r3} -; CHECK-NEXT: mov r0, r6 +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: vcmp.f32 s16, #0 +; CHECK-NEXT: vcmp.f32 s22, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: itttt lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: movlt r3, #0 +; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: ittt gt +; CHECK-NEXT: itttt gt ; CHECK-NEXT: movgt.w r3, #-1 ; CHECK-NEXT: movgt.w r2, #-1 ; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: stm r4!, {r0, r1, r2, r3} ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll index 9738f7ade6fe9..96aff0233e4d9 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll @@ -623,9 +623,9 @@ define arm_aapcs_vfpcc void @three_pointer_iv_v4i32(ptr nocapture readonly %x, p ; CHECK-NEXT: vldrw.u32 q3, [r0, q0] ; CHECK-NEXT: vldrw.u32 q4, [r0, q1, uxtw #2] ; CHECK-NEXT: vldrw.u32 q5, [r0, q2] -; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: adds r0, #48 ; CHECK-NEXT: vmul.i32 q3, q4, q3 -; CHECK-NEXT: add.w r0, r0, #48 +; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmul.i32 q5, q4, q5 ; CHECK-NEXT: vmul.i32 q4, q4, r3 ; CHECK-NEXT: vstrw.32 q4, [r1, q1, uxtw #2] @@ -705,9 +705,9 @@ define arm_aapcs_vfpcc void @three_pointer_iv_v4i8(ptr nocapture readonly %x, pt ; CHECK-NEXT: vldrb.u32 q3, [r0, q0] ; CHECK-NEXT: vldrb.u32 q4, [r0, q1] ; CHECK-NEXT: vldrb.u32 q5, [r0, q2] -; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: adds r0, #12 ; CHECK-NEXT: vmul.i32 q3, q4, q3 -; CHECK-NEXT: add.w r0, r0, #12 +; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmul.i32 q5, q4, q5 ; CHECK-NEXT: vmul.i32 q4, q4, r3 ; CHECK-NEXT: vstrb.32 q4, [r1, q1] @@ -793,9 +793,9 @@ define arm_aapcs_vfpcc void @three_pointer_iv_v8i16(ptr nocapture readonly %x, p ; CHECK-NEXT: vldrh.u16 q3, [r0, q0] ; CHECK-NEXT: vldrh.u16 q4, [r0, q1, uxtw #1] ; CHECK-NEXT: vldrh.u16 q5, [r0, q2] -; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: adds r0, #48 ; CHECK-NEXT: vmul.i16 q3, q4, q3 -; CHECK-NEXT: add.w r0, r0, #48 +; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmul.i16 q5, q4, q5 ; CHECK-NEXT: vmul.i16 q4, q4, r3 ; CHECK-NEXT: vstrh.16 q4, [r1, q1, uxtw #1] @@ -887,9 +887,9 @@ define arm_aapcs_vfpcc void @three_pointer_iv_v16i8(ptr nocapture readonly %x, p ; CHECK-NEXT: vldrb.u8 q3, [r0, q0] ; CHECK-NEXT: vldrb.u8 q4, [r0, q1] ; CHECK-NEXT: vldrb.u8 q5, [r0, q2] -; CHECK-NEXT: subs r2, #4 +; CHECK-NEXT: adds r0, #48 ; CHECK-NEXT: vmul.i8 q3, q4, q3 -; CHECK-NEXT: add.w r0, r0, #48 +; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vmul.i8 q5, q4, q5 ; CHECK-NEXT: vmul.i8 q4, q4, r3 ; CHECK-NEXT: vstrb.8 q4, [r1, q1] diff --git a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll index fe28f785623ed..acbe48f9e5927 100644 --- a/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll +++ b/llvm/test/CodeGen/Thumb2/mve-laneinterleaving.ll @@ -307,82 +307,83 @@ define arm_aapcs_vfpcc <4 x i32> @ext_ops_trunc_i32(<4 x i32> %a, <4 x i32> %b) ; CHECK-NEXT: vmov.f32 s10, s7 ; CHECK-NEXT: vmov r10, s8 ; CHECK-NEXT: vmov.f32 s8, s6 -; CHECK-NEXT: vmov r7, s2 -; CHECK-NEXT: vmov.f32 s2, s1 ; CHECK-NEXT: vmov.f32 s6, s5 ; CHECK-NEXT: vmov r2, s8 ; CHECK-NEXT: asr.w r0, r10, #31 -; CHECK-NEXT: asrs r5, r7, #31 -; CHECK-NEXT: adds.w r4, r10, r2 -; CHECK-NEXT: eor.w r6, r10, r2 +; CHECK-NEXT: adds.w r6, r10, r2 +; CHECK-NEXT: eor.w r7, r10, r2 ; CHECK-NEXT: adc r3, r0, #0 -; CHECK-NEXT: asrl r4, r3, r2 -; CHECK-NEXT: subs r0, r4, r2 +; CHECK-NEXT: asrl r6, r3, r2 +; CHECK-NEXT: subs r0, r6, r2 +; CHECK-NEXT: vmov r6, s2 ; CHECK-NEXT: sbc lr, r3, #0 ; CHECK-NEXT: vmov r3, s10 +; CHECK-NEXT: vmov.f32 s2, s1 ; CHECK-NEXT: umull r0, r8, r0, r2 -; CHECK-NEXT: adds r4, r7, r3 -; CHECK-NEXT: eor.w r1, r7, r3 +; CHECK-NEXT: asrs r5, r6, #31 +; CHECK-NEXT: adds r4, r6, r3 ; CHECK-NEXT: adc r5, r5, #0 +; CHECK-NEXT: eor.w r1, r6, r3 ; CHECK-NEXT: asrl r4, r5, r3 ; CHECK-NEXT: subs r4, r4, r3 ; CHECK-NEXT: sbc r5, r5, #0 -; CHECK-NEXT: orrs.w r6, r6, r10, asr #31 +; CHECK-NEXT: orrs.w r7, r7, r10, asr #31 ; CHECK-NEXT: umull r4, r12, r4, r3 ; CHECK-NEXT: csetm r9, eq -; CHECK-NEXT: orrs.w r1, r1, r7, asr #31 -; CHECK-NEXT: mov.w r6, #0 +; CHECK-NEXT: orrs.w r1, r1, r6, asr #31 +; CHECK-NEXT: mov.w r7, #0 ; CHECK-NEXT: csetm r1, eq -; CHECK-NEXT: bfi r6, r9, #0, #8 +; CHECK-NEXT: bfi r7, r9, #0, #8 ; CHECK-NEXT: mla r5, r5, r3, r12 -; CHECK-NEXT: bfi r6, r1, #8, #8 -; CHECK-NEXT: rsbs r1, r7, #0 +; CHECK-NEXT: bfi r7, r1, #8, #8 +; CHECK-NEXT: rsbs r1, r6, #0 +; CHECK-NEXT: vmsr p0, r7 ; CHECK-NEXT: mla r7, lr, r2, r8 ; CHECK-NEXT: lsll r4, r5, r1 ; CHECK-NEXT: rsb.w r1, r10, #0 +; CHECK-NEXT: lsll r4, r5, r3 ; CHECK-NEXT: lsll r0, r7, r1 -; CHECK-NEXT: vmov lr, s2 +; CHECK-NEXT: vmov r3, s2 ; CHECK-NEXT: vmov r1, s6 ; CHECK-NEXT: lsll r0, r7, r2 -; CHECK-NEXT: lsll r4, r5, r3 -; CHECK-NEXT: vmsr p0, r6 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r4 ; CHECK-NEXT: mov.w r12, #0 ; CHECK-NEXT: vpsel q2, q3, q2 -; CHECK-NEXT: adds.w r2, lr, r1 -; CHECK-NEXT: asr.w r0, lr, #31 -; CHECK-NEXT: adc r3, r0, #0 -; CHECK-NEXT: asrl r2, r3, r1 +; CHECK-NEXT: adds r2, r3, r1 +; CHECK-NEXT: asr.w r0, r3, #31 +; CHECK-NEXT: adc r5, r0, #0 +; CHECK-NEXT: asrl r2, r5, r1 ; CHECK-NEXT: subs r0, r2, r1 ; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: sbc r7, r3, #0 -; CHECK-NEXT: vmov r3, s4 -; CHECK-NEXT: umull r0, r6, r0, r1 -; CHECK-NEXT: asrs r5, r2, #31 -; CHECK-NEXT: adds r4, r2, r3 -; CHECK-NEXT: adc r5, r5, #0 -; CHECK-NEXT: asrl r4, r5, r3 -; CHECK-NEXT: subs r4, r4, r3 ; CHECK-NEXT: sbc r8, r5, #0 -; CHECK-NEXT: mla r5, r7, r1, r6 -; CHECK-NEXT: eor.w r6, lr, r1 -; CHECK-NEXT: orrs.w r6, r6, lr, asr #31 -; CHECK-NEXT: eor.w r7, r2, r3 +; CHECK-NEXT: umull r4, lr, r0, r1 +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: asrs r5, r2, #31 +; CHECK-NEXT: adds r6, r2, r0 +; CHECK-NEXT: adc r7, r5, #0 +; CHECK-NEXT: mla r5, r8, r1, lr +; CHECK-NEXT: asrl r6, r7, r0 +; CHECK-NEXT: subs.w r8, r6, r0 +; CHECK-NEXT: eor.w r6, r2, r0 +; CHECK-NEXT: sbc lr, r7, #0 +; CHECK-NEXT: eor.w r7, r3, r1 +; CHECK-NEXT: orrs.w r6, r6, r2, asr #31 +; CHECK-NEXT: orr.w r7, r7, r3, asr #31 +; CHECK-NEXT: csetm r6, eq +; CHECK-NEXT: cmp r7, #0 +; CHECK-NEXT: bfi r12, r6, #0, #8 ; CHECK-NEXT: csetm r6, eq -; CHECK-NEXT: orrs.w r7, r7, r2, asr #31 -; CHECK-NEXT: csetm r7, eq -; CHECK-NEXT: rsb.w lr, lr, #0 -; CHECK-NEXT: bfi r12, r7, #0, #8 -; CHECK-NEXT: lsll r0, r5, lr ; CHECK-NEXT: bfi r12, r6, #8, #8 -; CHECK-NEXT: umull r4, r6, r4, r3 -; CHECK-NEXT: lsll r0, r5, r1 -; CHECK-NEXT: rsbs r1, r2, #0 +; CHECK-NEXT: umull r6, r7, r8, r0 +; CHECK-NEXT: rsb.w r8, r3, #0 +; CHECK-NEXT: lsll r4, r5, r8 ; CHECK-NEXT: vmsr p0, r12 -; CHECK-NEXT: mla r7, r8, r3, r6 -; CHECK-NEXT: lsll r4, r7, r1 -; CHECK-NEXT: lsll r4, r7, r3 -; CHECK-NEXT: vmov q0[2], q0[0], r4, r0 +; CHECK-NEXT: mla r3, lr, r0, r7 +; CHECK-NEXT: lsll r4, r5, r1 +; CHECK-NEXT: rsbs r1, r2, #0 +; CHECK-NEXT: lsll r6, r3, r1 +; CHECK-NEXT: lsll r6, r3, r0 +; CHECK-NEXT: vmov q0[2], q0[0], r6, r4 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vmov.f32 s1, s2 ; CHECK-NEXT: vmov.f32 s2, s8 diff --git a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll index 55a621eaf4c9c..d9ef1030ee922 100644 --- a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll +++ b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll @@ -6,8 +6,8 @@ declare i8 @llvm.smax.i8(i8 %a, i8 %b) readnone define arm_aapcs_vfpcc i8 @smaxi8(i8 %a, i8 %b) { ; CHECK-LABEL: smaxi8: ; CHECK: @ %bb.0: -; CHECK-NEXT: sxtb r0, r0 ; CHECK-NEXT: sxtb r1, r1 +; CHECK-NEXT: sxtb r0, r0 ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: csel r0, r0, r1, gt ; CHECK-NEXT: bx lr @@ -20,8 +20,8 @@ declare i16 @llvm.smax.i16(i16 %a, i16 %b) readnone define arm_aapcs_vfpcc i16 @smaxi16(i16 %a, i16 %b) { ; CHECK-LABEL: smaxi16: ; CHECK: @ %bb.0: -; CHECK-NEXT: sxth r0, r0 ; CHECK-NEXT: sxth r1, r1 +; CHECK-NEXT: sxth r0, r0 ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: csel r0, r0, r1, gt ; CHECK-NEXT: bx lr @@ -48,10 +48,8 @@ define arm_aapcs_vfpcc i64 @smaxi64(i64 %a, i64 %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: subs.w r12, r2, r0 ; CHECK-NEXT: sbcs.w r12, r3, r1 -; CHECK-NEXT: cset r12, lt -; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: csel r0, r0, r2, lt +; CHECK-NEXT: csel r1, r1, r3, lt ; CHECK-NEXT: bx lr %c = call i64 @llvm.smax.i64(i64 %a, i64 %b) ret i64 %c @@ -205,10 +203,8 @@ define arm_aapcs_vfpcc <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: subs.w r12, r2, r0 ; CHECK-NEXT: sbcs.w r12, r3, r1 -; CHECK-NEXT: cset r12, lt -; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: csel r0, r0, r2, lt +; CHECK-NEXT: csel r1, r1, r3, lt ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: bx lr %c = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b) @@ -290,8 +286,8 @@ declare i8 @llvm.umax.i8(i8 %a, i8 %b) readnone define arm_aapcs_vfpcc i8 @umaxi8(i8 %a, i8 %b) { ; CHECK-LABEL: umaxi8: ; CHECK: @ %bb.0: -; CHECK-NEXT: uxtb r0, r0 ; CHECK-NEXT: uxtb r1, r1 +; CHECK-NEXT: uxtb r0, r0 ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: csel r0, r0, r1, hi ; CHECK-NEXT: bx lr @@ -304,8 +300,8 @@ declare i16 @llvm.umax.i16(i16 %a, i16 %b) readnone define arm_aapcs_vfpcc i16 @umaxi16(i16 %a, i16 %b) { ; CHECK-LABEL: umaxi16: ; CHECK: @ %bb.0: -; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: uxth r1, r1 +; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: csel r0, r0, r1, hi ; CHECK-NEXT: bx lr @@ -332,10 +328,8 @@ define arm_aapcs_vfpcc i64 @umaxi64(i64 %a, i64 %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: subs.w r12, r2, r0 ; CHECK-NEXT: sbcs.w r12, r3, r1 -; CHECK-NEXT: cset r12, lo -; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: csel r0, r0, r2, lo +; CHECK-NEXT: csel r1, r1, r3, lo ; CHECK-NEXT: bx lr %c = call i64 @llvm.umax.i64(i64 %a, i64 %b) ret i64 %c @@ -482,10 +476,8 @@ define arm_aapcs_vfpcc <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: subs.w r12, r2, r0 ; CHECK-NEXT: sbcs.w r12, r3, r1 -; CHECK-NEXT: cset r12, lo -; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: csel r0, r0, r2, lo +; CHECK-NEXT: csel r1, r1, r3, lo ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: bx lr %c = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b) @@ -567,8 +559,8 @@ declare i8 @llvm.smin.i8(i8 %a, i8 %b) readnone define arm_aapcs_vfpcc i8 @smini8(i8 %a, i8 %b) { ; CHECK-LABEL: smini8: ; CHECK: @ %bb.0: -; CHECK-NEXT: sxtb r0, r0 ; CHECK-NEXT: sxtb r1, r1 +; CHECK-NEXT: sxtb r0, r0 ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: csel r0, r0, r1, lt ; CHECK-NEXT: bx lr @@ -581,8 +573,8 @@ declare i16 @llvm.smin.i16(i16 %a, i16 %b) readnone define arm_aapcs_vfpcc i16 @smini16(i16 %a, i16 %b) { ; CHECK-LABEL: smini16: ; CHECK: @ %bb.0: -; CHECK-NEXT: sxth r0, r0 ; CHECK-NEXT: sxth r1, r1 +; CHECK-NEXT: sxth r0, r0 ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: csel r0, r0, r1, lt ; CHECK-NEXT: bx lr @@ -609,10 +601,8 @@ define arm_aapcs_vfpcc i64 @smini64(i64 %a, i64 %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: subs.w r12, r0, r2 ; CHECK-NEXT: sbcs.w r12, r1, r3 -; CHECK-NEXT: cset r12, lt -; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: csel r0, r0, r2, lt +; CHECK-NEXT: csel r1, r1, r3, lt ; CHECK-NEXT: bx lr %c = call i64 @llvm.smin.i64(i64 %a, i64 %b) ret i64 %c @@ -766,10 +756,8 @@ define arm_aapcs_vfpcc <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: subs.w r12, r0, r2 ; CHECK-NEXT: sbcs.w r12, r1, r3 -; CHECK-NEXT: cset r12, lt -; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: csel r0, r0, r2, lt +; CHECK-NEXT: csel r1, r1, r3, lt ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: bx lr %c = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b) @@ -851,8 +839,8 @@ declare i8 @llvm.umin.i8(i8 %a, i8 %b) readnone define arm_aapcs_vfpcc i8 @umini8(i8 %a, i8 %b) { ; CHECK-LABEL: umini8: ; CHECK: @ %bb.0: -; CHECK-NEXT: uxtb r0, r0 ; CHECK-NEXT: uxtb r1, r1 +; CHECK-NEXT: uxtb r0, r0 ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: csel r0, r0, r1, lo ; CHECK-NEXT: bx lr @@ -865,8 +853,8 @@ declare i16 @llvm.umin.i16(i16 %a, i16 %b) readnone define arm_aapcs_vfpcc i16 @umini16(i16 %a, i16 %b) { ; CHECK-LABEL: umini16: ; CHECK: @ %bb.0: -; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: uxth r1, r1 +; CHECK-NEXT: uxth r0, r0 ; CHECK-NEXT: cmp r0, r1 ; CHECK-NEXT: csel r0, r0, r1, lo ; CHECK-NEXT: bx lr @@ -893,10 +881,8 @@ define arm_aapcs_vfpcc i64 @umini64(i64 %a, i64 %b) { ; CHECK: @ %bb.0: ; CHECK-NEXT: subs.w r12, r0, r2 ; CHECK-NEXT: sbcs.w r12, r1, r3 -; CHECK-NEXT: cset r12, lo -; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: csel r0, r0, r2, lo +; CHECK-NEXT: csel r1, r1, r3, lo ; CHECK-NEXT: bx lr %c = call i64 @llvm.umin.i64(i64 %a, i64 %b) ret i64 %c @@ -1043,10 +1029,8 @@ define arm_aapcs_vfpcc <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) { ; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: subs.w r12, r0, r2 ; CHECK-NEXT: sbcs.w r12, r1, r3 -; CHECK-NEXT: cset r12, lo -; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: csel r1, r1, r3, ne +; CHECK-NEXT: csel r0, r0, r2, lo +; CHECK-NEXT: csel r1, r1, r3, lo ; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: bx lr %c = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b) diff --git a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll index 70957ca950d71..43ed5eefbf4c7 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pipelineloops.ll @@ -13,8 +13,8 @@ define void @arm_cmplx_dot_prod_q15(ptr noundef %pSrcA, ptr noundef %pSrcB, i32 ; CHECK-NEXT: movs r6, #2 ; CHECK-NEXT: lsrs r7, r2, #3 ; CHECK-NEXT: rsb r6, r6, r2, lsr #3 -; CHECK-NEXT: movs r5, #0 ; CHECK-NEXT: cmp r7, #2 +; CHECK-NEXT: mov.w r5, #0 ; CHECK-NEXT: csel r7, r6, r5, hs ; CHECK-NEXT: add.w lr, r7, #1 ; CHECK-NEXT: mov r4, r5 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll index 101b49fea488a..0bec2b100911c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll @@ -28,11 +28,12 @@ define arm_aapcs_vfpcc <4 x i32> @sext_v4i1_v4f32(<4 x float> %src1, <4 x float> ; CHECK-MVE-NEXT: csetm r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmov q0[2], q0[0], r1, r0 -; CHECK-MVE-NEXT: csetm r2, ne +; CHECK-MVE-NEXT: vmov q2[2], q2[0], r1, r0 +; CHECK-MVE-NEXT: csetm r0, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: csetm r3, ne -; CHECK-MVE-NEXT: vmov q0[3], q0[1], r3, r2 +; CHECK-MVE-NEXT: csetm r1, ne +; CHECK-MVE-NEXT: vmov q2[3], q2[1], r1, r0 +; CHECK-MVE-NEXT: vmov q0, q2 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: sext_v4i1_v4f32: @@ -65,49 +66,49 @@ entry: define arm_aapcs_vfpcc <8 x i16> @sext_v8i1_v8f32(<8 x half> %src1, <8 x half> %src2) { ; CHECK-MVE-LABEL: sext_v8i1_v8f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: .save {r4, r5, r7, lr} -; CHECK-MVE-NEXT: push {r4, r5, r7, lr} +; CHECK-MVE-NEXT: .save {r4, lr} +; CHECK-MVE-NEXT: push {r4, lr} ; CHECK-MVE-NEXT: vcmp.f16 s3, s7 -; CHECK-MVE-NEXT: vmovx.f16 s8, s7 +; CHECK-MVE-NEXT: vmovx.f16 s8, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vmovx.f16 s10, s3 +; CHECK-MVE-NEXT: vmovx.f16 s10, s2 ; CHECK-MVE-NEXT: vcmp.f16 s10, s8 +; CHECK-MVE-NEXT: vmovx.f16 s8, s5 +; CHECK-MVE-NEXT: vmovx.f16 s10, s1 ; CHECK-MVE-NEXT: csetm r12, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s6, s6 -; CHECK-MVE-NEXT: vmovx.f16 s2, s2 +; CHECK-MVE-NEXT: vcmp.f16 s10, s8 ; CHECK-MVE-NEXT: csetm lr, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s2, s6 -; CHECK-MVE-NEXT: vmovx.f16 s2, s5 -; CHECK-MVE-NEXT: vmovx.f16 s6, s1 +; CHECK-MVE-NEXT: vmovx.f16 s2, s4 +; CHECK-MVE-NEXT: vmovx.f16 s6, s0 ; CHECK-MVE-NEXT: csetm r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f16 s1, s5 +; CHECK-MVE-NEXT: vcmp.f16 s6, s2 +; CHECK-MVE-NEXT: vmovx.f16 s2, s3 ; CHECK-MVE-NEXT: csetm r3, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f16 s6, s2 -; CHECK-MVE-NEXT: vmovx.f16 s2, s4 +; CHECK-MVE-NEXT: vcmp.f16 s1, s5 ; CHECK-MVE-NEXT: csetm r0, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s0, s4 -; CHECK-MVE-NEXT: vmovx.f16 s0, s0 +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 ; CHECK-MVE-NEXT: csetm r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f16 s0, s2 +; CHECK-MVE-NEXT: vcmp.f16 s2, s0 ; CHECK-MVE-NEXT: csetm r4, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vmov.16 q0[0], r4 -; CHECK-MVE-NEXT: csetm r5, ne -; CHECK-MVE-NEXT: vmov.16 q0[1], r5 -; CHECK-MVE-NEXT: vmov.16 q0[2], r0 -; CHECK-MVE-NEXT: vmov.16 q0[3], r1 -; CHECK-MVE-NEXT: vmov.16 q0[4], r2 -; CHECK-MVE-NEXT: vmov.16 q0[5], r3 +; CHECK-MVE-NEXT: vmov.16 q0[1], r0 +; CHECK-MVE-NEXT: vmov.16 q0[2], r1 +; CHECK-MVE-NEXT: vmov.16 q0[3], r2 +; CHECK-MVE-NEXT: vmov.16 q0[4], r3 +; CHECK-MVE-NEXT: vmov.16 q0[5], lr ; CHECK-MVE-NEXT: vmov.16 q0[6], r12 -; CHECK-MVE-NEXT: vmov.16 q0[7], lr -; CHECK-MVE-NEXT: pop {r4, r5, r7, pc} +; CHECK-MVE-NEXT: csetm r0, ne +; CHECK-MVE-NEXT: vmov.16 q0[7], r0 +; CHECK-MVE-NEXT: pop {r4, pc} ; ; CHECK-MVEFP-LABEL: sext_v8i1_v8f32: ; CHECK-MVEFP: @ %bb.0: @ %entry @@ -198,22 +199,22 @@ define arm_aapcs_vfpcc <2 x i64> @sext_v2i1_v2f64(<2 x double> %src) { ; CHECK-MVEFP-NEXT: vpush {d8, d9} ; CHECK-MVEFP-NEXT: vmov q4, q0 ; CHECK-MVEFP-NEXT: vldr d0, .LCPI6_0 -; CHECK-MVEFP-NEXT: vmov r0, r1, d9 +; CHECK-MVEFP-NEXT: vmov r0, r1, d8 ; CHECK-MVEFP-NEXT: vmov r4, r5, d0 ; CHECK-MVEFP-NEXT: mov r2, r4 ; CHECK-MVEFP-NEXT: mov r3, r5 ; CHECK-MVEFP-NEXT: bl __aeabi_dcmpeq ; CHECK-MVEFP-NEXT: mov r6, r0 -; CHECK-MVEFP-NEXT: vmov r0, r1, d8 +; CHECK-MVEFP-NEXT: vmov r0, r1, d9 ; CHECK-MVEFP-NEXT: mov r2, r4 ; CHECK-MVEFP-NEXT: mov r3, r5 ; CHECK-MVEFP-NEXT: bl __aeabi_dcmpeq -; CHECK-MVEFP-NEXT: cmp r6, #0 -; CHECK-MVEFP-NEXT: csetm r1, eq ; CHECK-MVEFP-NEXT: cmp r0, #0 ; CHECK-MVEFP-NEXT: csetm r0, eq -; CHECK-MVEFP-NEXT: vmov q0[2], q0[0], r0, r1 -; CHECK-MVEFP-NEXT: vmov q0[3], q0[1], r0, r1 +; CHECK-MVEFP-NEXT: cmp r6, #0 +; CHECK-MVEFP-NEXT: csetm r1, eq +; CHECK-MVEFP-NEXT: vmov q0[2], q0[0], r1, r0 +; CHECK-MVEFP-NEXT: vmov q0[3], q0[1], r1, r0 ; CHECK-MVEFP-NEXT: vpop {d8, d9} ; CHECK-MVEFP-NEXT: pop {r4, r5, r6, pc} ; CHECK-MVEFP-NEXT: .p2align 3 @@ -245,22 +246,22 @@ entry: define arm_aapcs_vfpcc <4 x i32> @zext_v4i1_v4f32(<4 x float> %src1, <4 x float> %src2) { ; CHECK-MVE-LABEL: zext_v4i1_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: vmov.i32 q2, #0x1 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 +; CHECK-MVE-NEXT: vcmp.f32 s0, s4 ; CHECK-MVE-NEXT: csetm r0, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s2, s6 +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: csetm r1, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: csetm r2, ne +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 +; CHECK-MVE-NEXT: vmov q3[2], q3[0], r1, r0 +; CHECK-MVE-NEXT: csetm r0, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: csetm r3, ne -; CHECK-MVE-NEXT: vmov q0[2], q0[0], r3, r2 -; CHECK-MVE-NEXT: vmov q0[3], q0[1], r1, r0 -; CHECK-MVE-NEXT: vand q0, q0, q2 +; CHECK-MVE-NEXT: csetm r1, ne +; CHECK-MVE-NEXT: vmov q3[3], q3[1], r1, r0 +; CHECK-MVE-NEXT: vand q0, q3, q2 ; CHECK-MVE-NEXT: bx lr ; ; CHECK-MVEFP-LABEL: zext_v4i1_v4f32: @@ -293,51 +294,51 @@ entry: define arm_aapcs_vfpcc <8 x i16> @zext_v8i1_v8f32(<8 x half> %src1, <8 x half> %src2) { ; CHECK-MVE-LABEL: zext_v8i1_v8f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: .save {r4, r5, r7, lr} -; CHECK-MVE-NEXT: push {r4, r5, r7, lr} -; CHECK-MVE-NEXT: vmovx.f16 s8, s7 -; CHECK-MVE-NEXT: vmovx.f16 s10, s3 -; CHECK-MVE-NEXT: vcmp.f16 s10, s8 +; CHECK-MVE-NEXT: .save {r4, lr} +; CHECK-MVE-NEXT: push {r4, lr} +; CHECK-MVE-NEXT: vcmp.f16 s3, s7 ; CHECK-MVE-NEXT: vmovx.f16 s8, s6 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f16 s3, s7 ; CHECK-MVE-NEXT: vmovx.f16 s10, s2 -; CHECK-MVE-NEXT: csetm r12, ne -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s10, s8 -; CHECK-MVE-NEXT: csetm lr, ne +; CHECK-MVE-NEXT: csetm r12, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s2, s6 ; CHECK-MVE-NEXT: vmovx.f16 s2, s5 ; CHECK-MVE-NEXT: vmovx.f16 s6, s1 -; CHECK-MVE-NEXT: csetm r2, ne +; CHECK-MVE-NEXT: csetm lr, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s6, s2 ; CHECK-MVE-NEXT: vmovx.f16 s2, s4 ; CHECK-MVE-NEXT: vmovx.f16 s6, s0 -; CHECK-MVE-NEXT: csetm r3, ne +; CHECK-MVE-NEXT: csetm r2, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s1, s5 -; CHECK-MVE-NEXT: csetm r0, ne +; CHECK-MVE-NEXT: csetm r3, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s6, s2 -; CHECK-MVE-NEXT: csetm r1, ne +; CHECK-MVE-NEXT: vmovx.f16 s2, s3 +; CHECK-MVE-NEXT: csetm r0, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f16 s0, s4 +; CHECK-MVE-NEXT: vmovx.f16 s0, s7 +; CHECK-MVE-NEXT: csetm r1, ne +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f16 s2, s0 ; CHECK-MVE-NEXT: vmov.i16 q0, #0x1 ; CHECK-MVE-NEXT: csetm r4, ne ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: csetm r5, ne -; CHECK-MVE-NEXT: vmov.16 q1[0], r5 -; CHECK-MVE-NEXT: vmov.16 q1[1], r4 -; CHECK-MVE-NEXT: vmov.16 q1[2], r1 -; CHECK-MVE-NEXT: vmov.16 q1[3], r0 -; CHECK-MVE-NEXT: vmov.16 q1[4], r3 -; CHECK-MVE-NEXT: vmov.16 q1[5], r2 -; CHECK-MVE-NEXT: vmov.16 q1[6], lr -; CHECK-MVE-NEXT: vmov.16 q1[7], r12 +; CHECK-MVE-NEXT: vmov.16 q1[0], r4 +; CHECK-MVE-NEXT: vmov.16 q1[1], r1 +; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov.16 q1[3], r3 +; CHECK-MVE-NEXT: vmov.16 q1[4], r2 +; CHECK-MVE-NEXT: vmov.16 q1[5], lr +; CHECK-MVE-NEXT: vmov.16 q1[6], r12 +; CHECK-MVE-NEXT: csetm r0, ne +; CHECK-MVE-NEXT: vmov.16 q1[7], r0 ; CHECK-MVE-NEXT: vand q0, q1, q0 -; CHECK-MVE-NEXT: pop {r4, r5, r7, pc} +; CHECK-MVE-NEXT: pop {r4, pc} ; ; CHECK-MVEFP-LABEL: zext_v8i1_v8f32: ; CHECK-MVEFP: @ %bb.0: @ %entry @@ -614,24 +615,24 @@ entry: define arm_aapcs_vfpcc <4 x float> @fptoui_v4i1_v4f32(<4 x float> %src) { ; CHECK-MVE-LABEL: fptoui_v4i1_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s3 -; CHECK-MVE-NEXT: vldr s8, .LCPI20_0 ; CHECK-MVE-NEXT: vcvt.s32.f32 s2, s2 -; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s1 -; CHECK-MVE-NEXT: vmov.f32 s4, #1.000000e+00 +; CHECK-MVE-NEXT: vldr s10, .LCPI20_0 +; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s1 +; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s3 ; CHECK-MVE-NEXT: vcvt.s32.f32 s0, s0 -; CHECK-MVE-NEXT: vmov r0, s6 -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vmov r0, s2 -; CHECK-MVE-NEXT: vseleq.f32 s3, s8, s4 -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vmov r0, s10 -; CHECK-MVE-NEXT: vseleq.f32 s2, s8, s4 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vmov.f32 s8, #1.000000e+00 +; CHECK-MVE-NEXT: vmov r3, s2 +; CHECK-MVE-NEXT: vmov r2, s6 +; CHECK-MVE-NEXT: vmov r1, s4 ; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vseleq.f32 s1, s8, s4 +; CHECK-MVE-NEXT: cmp r3, #0 +; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s8 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: vseleq.f32 s1, s10, s8 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s10, s8 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 +; CHECK-MVE-NEXT: vseleq.f32 s0, s10, s8 ; CHECK-MVE-NEXT: bx lr ; CHECK-MVE-NEXT: .p2align 2 ; CHECK-MVE-NEXT: @ %bb.1: @@ -654,24 +655,27 @@ entry: define arm_aapcs_vfpcc <4 x float> @fptosi_v4i1_v4f32(<4 x float> %src) { ; CHECK-MVE-LABEL: fptosi_v4i1_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s3 -; CHECK-MVE-NEXT: vldr s10, .LCPI21_0 ; CHECK-MVE-NEXT: vcvt.s32.f32 s2, s2 -; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s1 -; CHECK-MVE-NEXT: vmov.f32 s4, #1.000000e+00 +; CHECK-MVE-NEXT: vldr s8, .LCPI21_0 +; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s1 +; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s3 ; CHECK-MVE-NEXT: vcvt.s32.f32 s0, s0 -; CHECK-MVE-NEXT: vmov r0, s8 -; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov r0, s2 -; CHECK-MVE-NEXT: vseleq.f32 s3, s10, s4 -; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov r0, s6 -; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s4 -; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov.f32 s6, #1.000000e+00 +; CHECK-MVE-NEXT: vmov r3, s2 +; CHECK-MVE-NEXT: vmov r2, s4 +; CHECK-MVE-NEXT: vmov r1, s10 ; CHECK-MVE-NEXT: vmov r0, s0 -; CHECK-MVE-NEXT: vseleq.f32 s1, s10, s4 -; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vseleq.f32 s0, s10, s4 +; CHECK-MVE-NEXT: lsls r3, r3, #31 +; CHECK-MVE-NEXT: lsl.w r2, r2, #31 +; CHECK-MVE-NEXT: vseleq.f32 s2, s8, s6 +; CHECK-MVE-NEXT: cmp r2, #0 +; CHECK-MVE-NEXT: lsl.w r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f32 s1, s8, s6 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: lsl.w r0, r0, #31 +; CHECK-MVE-NEXT: vseleq.f32 s3, s8, s6 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s6 ; CHECK-MVE-NEXT: bx lr ; CHECK-MVE-NEXT: .p2align 2 ; CHECK-MVE-NEXT: @ %bb.1: @@ -801,45 +805,45 @@ define arm_aapcs_vfpcc <8 x half> @fptoui_v8i1_v8f16(<8 x half> %src) { ; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s0 ; CHECK-MVE-NEXT: vmovx.f16 s0, s0 ; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-MVE-NEXT: vmov r0, s4 +; CHECK-MVE-NEXT: vmov r1, s0 ; CHECK-MVE-NEXT: vldr.16 s8, .LCPI24_0 -; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.f16 s6, #1.000000e+00 -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6 -; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s1 -; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s6 -; CHECK-MVE-NEXT: vins.f16 s0, s10 ; CHECK-MVE-NEXT: vmovx.f16 s10, s1 ; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 -; CHECK-MVE-NEXT: vmov r0, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov r1, s10 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 ; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s6 +; CHECK-MVE-NEXT: vmovx.f16 s10, s3 +; CHECK-MVE-NEXT: vins.f16 s0, s4 +; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s1 ; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s2 -; CHECK-MVE-NEXT: vmovx.f16 s2, s2 -; CHECK-MVE-NEXT: vcvt.s32.f16 s2, s2 -; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6 +; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 ; CHECK-MVE-NEXT: cmp r0, #0 -; CHECK-MVE-NEXT: vmov r0, s2 ; CHECK-MVE-NEXT: vseleq.f16 s1, s8, s6 -; CHECK-MVE-NEXT: vins.f16 s1, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vins.f16 s1, s4 +; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s2 +; CHECK-MVE-NEXT: vmovx.f16 s2, s2 ; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6 -; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s3 +; CHECK-MVE-NEXT: vcvt.s32.f16 s2, s2 +; CHECK-MVE-NEXT: vmov r1, s2 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vmov r1, s10 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s2, s8, s6 -; CHECK-MVE-NEXT: vins.f16 s2, s10 -; CHECK-MVE-NEXT: vmovx.f16 s10, s3 -; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 -; CHECK-MVE-NEXT: vmov r0, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vins.f16 s2, s4 +; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s3 ; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6 +; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 ; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s3, s8, s6 -; CHECK-MVE-NEXT: vins.f16 s3, s10 +; CHECK-MVE-NEXT: vins.f16 s3, s4 ; CHECK-MVE-NEXT: bx lr ; CHECK-MVE-NEXT: .p2align 1 ; CHECK-MVE-NEXT: @ %bb.1: @@ -865,45 +869,49 @@ define arm_aapcs_vfpcc <8 x half> @fptosi_v8i1_v8f16(<8 x half> %src) { ; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s0 ; CHECK-MVE-NEXT: vmovx.f16 s0, s0 ; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-MVE-NEXT: vmov r0, s4 +; CHECK-MVE-NEXT: vmov r1, s0 ; CHECK-MVE-NEXT: vldr.16 s8, .LCPI25_0 -; CHECK-MVE-NEXT: vmov r0, s0 ; CHECK-MVE-NEXT: vmov.f16 s6, #1.000000e+00 -; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6 -; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s1 -; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s6 -; CHECK-MVE-NEXT: vins.f16 s0, s10 ; CHECK-MVE-NEXT: vmovx.f16 s10, s1 ; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 -; CHECK-MVE-NEXT: vmov r0, s10 ; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 +; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: vseleq.f16 s0, s8, s6 +; CHECK-MVE-NEXT: vmov r1, s10 +; CHECK-MVE-NEXT: vins.f16 s0, s4 +; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s1 ; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s2 -; CHECK-MVE-NEXT: vmovx.f16 s2, s2 -; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6 -; CHECK-MVE-NEXT: vcvt.s32.f16 s2, s2 +; CHECK-MVE-NEXT: vmovx.f16 s10, s3 +; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 -; CHECK-MVE-NEXT: vmov r0, s2 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s1, s8, s6 -; CHECK-MVE-NEXT: vins.f16 s1, s10 -; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vins.f16 s1, s4 +; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s2 +; CHECK-MVE-NEXT: vmovx.f16 s2, s2 ; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6 -; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s3 +; CHECK-MVE-NEXT: vcvt.s32.f16 s2, s2 +; CHECK-MVE-NEXT: vmov r1, s2 ; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s2, s8, s6 -; CHECK-MVE-NEXT: vins.f16 s2, s10 -; CHECK-MVE-NEXT: vmovx.f16 s10, s3 -; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 -; CHECK-MVE-NEXT: vmov r0, s10 -; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: vmov r1, s10 +; CHECK-MVE-NEXT: vins.f16 s2, s4 +; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s3 ; CHECK-MVE-NEXT: vmov r0, s4 -; CHECK-MVE-NEXT: vseleq.f16 s10, s8, s6 +; CHECK-MVE-NEXT: lsls r1, r1, #31 +; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s6 ; CHECK-MVE-NEXT: lsls r0, r0, #31 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f16 s3, s8, s6 -; CHECK-MVE-NEXT: vins.f16 s3, s10 +; CHECK-MVE-NEXT: vins.f16 s3, s4 ; CHECK-MVE-NEXT: bx lr ; CHECK-MVE-NEXT: .p2align 1 ; CHECK-MVE-NEXT: @ %bb.1: diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll index 46406aeebfa4e..ff5ee8929aae8 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll @@ -378,23 +378,21 @@ entry: define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: cmpeqz_v2i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r0, r1, d2 ; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmov r1, r2, d2 +; CHECK-NEXT: vmov r1, r2, d0 +; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r1, r2 -; CHECK-NEXT: cset r1, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csinc r0, r1, zr, ne +; CHECK-NEXT: csinc r0, r0, zr, ne ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #0, #8 -; CHECK-NEXT: vmov r0, r2, d1 +; CHECK-NEXT: vmov r0, r2, d3 ; CHECK-NEXT: orrs r0, r2 -; CHECK-NEXT: vmov r2, r3, d3 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: cset r2, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csinc r0, r2, zr, ne +; CHECK-NEXT: csinc r0, r0, zr, ne ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll index bf6468baac22b..9400f24e7192c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-vctpvpsel.ll @@ -4,15 +4,15 @@ define void @arm_min_helium_f32(ptr %pSrc, i32 %blockSize, ptr nocapture %pResult, ptr nocapture %pIndex) { ; CHECK-LABEL: arm_min_helium_f32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r6, r7, lr} -; CHECK-NEXT: push {r4, r6, r7, lr} -; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: movs r4, #0 ; CHECK-NEXT: mov.w r12, #4 -; CHECK-NEXT: vidup.u32 q2, r6, #1 -; CHECK-NEXT: movw r4, #54437 -; CHECK-NEXT: movt r4, #21352 +; CHECK-NEXT: vidup.u32 q2, r4, #1 +; CHECK-NEXT: movw r5, #54437 +; CHECK-NEXT: movt r5, #21352 +; CHECK-NEXT: vdup.32 q1, r5 ; CHECK-NEXT: vmov.i32 q0, #0x0 -; CHECK-NEXT: vdup.32 q1, r4 ; CHECK-NEXT: dlstp.32 lr, r1 ; CHECK-NEXT: .LBB0_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 @@ -33,7 +33,7 @@ define void @arm_min_helium_f32(ptr %pSrc, i32 %blockSize, ptr nocapture %pResul ; CHECK-NEXT: vminv.u32 r1, q0 ; CHECK-NEXT: str r1, [r3] ; CHECK-NEXT: vstr s8, [r2] -; CHECK-NEXT: pop {r4, r6, r7, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.3: ; CHECK-NEXT: .LCPI0_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll b/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll index f70af5661f4c9..a990cee1f5fb3 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-vselect.ll @@ -70,29 +70,27 @@ entry: define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { ; CHECK-LABEL: cmpeqz_v2i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov r0, r1, d4 +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmov r1, r2, d0 +; CHECK-NEXT: vmov r1, r2, d2 +; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r1, r2 -; CHECK-NEXT: vmov r2, r3, d2 +; CHECK-NEXT: vmov r2, r3, d4 ; CHECK-NEXT: cset r1, eq ; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: cset r2, eq -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r0, r1, r2, eq +; CHECK-NEXT: csel r0, r0, r1, eq ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #0, #8 -; CHECK-NEXT: vmov r0, r2, d5 -; CHECK-NEXT: orr.w r12, r0, r2 -; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: vmov r0, r2, d1 +; CHECK-NEXT: orrs r0, r2 +; CHECK-NEXT: vmov r2, r3, d3 +; CHECK-NEXT: cset r12, eq ; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: vmov r3, r0, d3 +; CHECK-NEXT: vmov r3, r0, d5 ; CHECK-NEXT: cset r2, eq ; CHECK-NEXT: orrs r0, r3 -; CHECK-NEXT: cset r0, eq -; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r0, r2, r0, eq +; CHECK-NEXT: csel r0, r12, r2, eq ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 @@ -176,29 +174,27 @@ entry: define arm_aapcs_vfpcc <2 x i64> @cmpnez_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { ; CHECK-LABEL: cmpnez_v2i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov r0, r1, d4 +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmov r1, r2, d0 +; CHECK-NEXT: vmov r1, r2, d2 +; CHECK-NEXT: cset r0, ne ; CHECK-NEXT: orrs r1, r2 -; CHECK-NEXT: vmov r2, r3, d2 +; CHECK-NEXT: vmov r2, r3, d4 ; CHECK-NEXT: cset r1, ne ; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: cset r2, ne -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csel r0, r1, r2, ne +; CHECK-NEXT: csel r0, r0, r1, ne ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #0, #8 -; CHECK-NEXT: vmov r0, r2, d5 -; CHECK-NEXT: orr.w r12, r0, r2 -; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: vmov r0, r2, d1 +; CHECK-NEXT: orrs r0, r2 +; CHECK-NEXT: vmov r2, r3, d3 +; CHECK-NEXT: cset r12, ne ; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: vmov r3, r0, d3 +; CHECK-NEXT: vmov r3, r0, d5 ; CHECK-NEXT: cset r2, ne ; CHECK-NEXT: orrs r0, r3 -; CHECK-NEXT: cset r0, ne -; CHECK-NEXT: cmp.w r12, #0 -; CHECK-NEXT: csel r0, r2, r0, ne +; CHECK-NEXT: csel r0, r12, r2, ne ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 @@ -284,19 +280,19 @@ entry: define arm_aapcs_vfpcc <2 x i64> @cmpsltz_v2i1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { ; CHECK-LABEL: cmpsltz_v2i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov r0, s9 +; CHECK-NEXT: vmov r2, s9 ; CHECK-NEXT: movs r3, #0 -; CHECK-NEXT: vmov r1, s1 -; CHECK-NEXT: vmov r2, s5 -; CHECK-NEXT: cmp.w r3, r0, lsr #31 -; CHECK-NEXT: csel r0, r1, r2, ne -; CHECK-NEXT: vmov r1, s11 +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: vmov r1, s5 +; CHECK-NEXT: cmp.w r3, r2, lsr #31 +; CHECK-NEXT: vmov r2, s7 +; CHECK-NEXT: csel r0, r0, r1, ne +; CHECK-NEXT: vmov r1, s3 ; CHECK-NEXT: asr.w r12, r0, #31 -; CHECK-NEXT: vmov r2, s3 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: cmp.w r3, r1, lsr #31 +; CHECK-NEXT: vmov r0, s11 +; CHECK-NEXT: cmp.w r3, r0, lsr #31 ; CHECK-NEXT: bfi r3, r12, #0, #8 -; CHECK-NEXT: csel r0, r2, r0, ne +; CHECK-NEXT: csel r0, r1, r2, ne ; CHECK-NEXT: asrs r0, r0, #31 ; CHECK-NEXT: bfi r3, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r3 @@ -381,34 +377,34 @@ entry: define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1_i1(<2 x i64> %a, <2 x i64> %b, i64 %c) { ; CHECK-LABEL: cmpeqz_v2i1_i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: orr.w r3, r0, r1 -; CHECK-NEXT: vmov r0, r1, d2 -; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmov r1, r2, d3 +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov r2, r3, d2 +; CHECK-NEXT: orrs r2, r3 +; CHECK-NEXT: vmov r3, r4, d3 ; CHECK-NEXT: csetm r12, eq -; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: orrs r1, r2 -; CHECK-NEXT: vmov r1, r2, d0 -; CHECK-NEXT: csetm r4, eq -; CHECK-NEXT: orrs r1, r2 -; CHECK-NEXT: vmov r1, r2, d1 +; CHECK-NEXT: movs r2, #0 +; CHECK-NEXT: orrs r3, r4 +; CHECK-NEXT: vmov r4, r3, d0 +; CHECK-NEXT: csetm r5, eq +; CHECK-NEXT: orrs r3, r4 +; CHECK-NEXT: vmov r3, r4, d1 ; CHECK-NEXT: csetm lr, eq -; CHECK-NEXT: orrs r1, r2 -; CHECK-NEXT: csetm r1, eq -; CHECK-NEXT: cbz r3, .LBB15_2 +; CHECK-NEXT: orrs r3, r4 +; CHECK-NEXT: csetm r4, eq +; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: beq .LBB15_2 ; CHECK-NEXT: @ %bb.1: @ %select.false -; CHECK-NEXT: bfi r0, r12, #0, #8 -; CHECK-NEXT: bfi r0, r4, #8, #8 +; CHECK-NEXT: bfi r2, r12, #0, #8 +; CHECK-NEXT: bfi r2, r5, #8, #8 ; CHECK-NEXT: b .LBB15_3 ; CHECK-NEXT: .LBB15_2: -; CHECK-NEXT: bfi r0, lr, #0, #8 -; CHECK-NEXT: bfi r0, r1, #8, #8 +; CHECK-NEXT: bfi r2, lr, #0, #8 +; CHECK-NEXT: bfi r2, r4, #8, #8 ; CHECK-NEXT: .LBB15_3: @ %select.end -; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vmsr p0, r2 ; CHECK-NEXT: vpsel q0, q0, q1 -; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %c1 = icmp eq <2 x i64> %a, zeroinitializer %c2 = icmp eq <2 x i64> %b, zeroinitializer diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll index 0ff262e6b53ab..f4a0d5120305a 100644 --- a/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-xor.ll @@ -458,26 +458,24 @@ entry: define arm_aapcs_vfpcc <2 x i64> @cmpeqz_v2i1(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: cmpeqz_v2i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r0, r1, d2 ; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmov r1, r2, d2 +; CHECK-NEXT: vmov r1, r2, d0 +; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r1, r2 -; CHECK-NEXT: cset r1, eq -; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: eoreq r1, r1, #1 -; CHECK-NEXT: rsbs r0, r1, #0 +; CHECK-NEXT: eoreq r0, r0, #1 +; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: bfi r1, r0, #0, #8 -; CHECK-NEXT: vmov r0, r2, d1 +; CHECK-NEXT: vmov r0, r2, d3 ; CHECK-NEXT: orrs r0, r2 -; CHECK-NEXT: vmov r2, r3, d3 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: cset r0, eq ; CHECK-NEXT: orrs r2, r3 -; CHECK-NEXT: cset r2, eq -; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: it eq -; CHECK-NEXT: eoreq r2, r2, #1 -; CHECK-NEXT: rsbs r0, r2, #0 +; CHECK-NEXT: eoreq r0, r0, #1 +; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: bfi r1, r0, #8, #8 ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpsel q0, q0, q1 diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll index 8eb941371f993..29b56639bd769 100644 --- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll @@ -6,102 +6,99 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #12 -; CHECK-NEXT: sub sp, #12 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 ; CHECK-NEXT: cmp r3, #0 ; CHECK-NEXT: beq.w .LBB0_8 ; CHECK-NEXT: @ %bb.1: @ %entry +; CHECK-NEXT: mov r11, r2 ; CHECK-NEXT: cmp r3, #1 ; CHECK-NEXT: bne .LBB0_3 ; CHECK-NEXT: @ %bb.2: -; CHECK-NEXT: movs r7, #0 +; CHECK-NEXT: movs r2, #0 ; CHECK-NEXT: mov r12, r0 ; CHECK-NEXT: mov r8, r1 -; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: mov r10, r11 ; CHECK-NEXT: b .LBB0_6 ; CHECK-NEXT: .LBB0_3: @ %vector.ph -; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: bic r3, r3, #1 -; CHECK-NEXT: subs r7, r3, #2 -; CHECK-NEXT: movs r6, #1 +; CHECK-NEXT: bic r2, r3, #1 ; CHECK-NEXT: adr r4, .LCPI0_0 -; CHECK-NEXT: str r3, [sp] @ 4-byte Spill +; CHECK-NEXT: subs r7, r2, #2 +; CHECK-NEXT: movs r6, #1 +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: add.w r10, r11, r2, lsl #2 ; CHECK-NEXT: add.w lr, r6, r7, lsr #1 -; CHECK-NEXT: add.w r10, r2, r3, lsl #2 -; CHECK-NEXT: add.w r8, r1, r3, lsl #2 -; CHECK-NEXT: add.w r12, r0, r3, lsl #2 +; CHECK-NEXT: str r2, [sp] @ 4-byte Spill +; CHECK-NEXT: add.w r8, r1, r2, lsl #2 +; CHECK-NEXT: add.w r12, r0, r2, lsl #2 ; CHECK-NEXT: vldrw.u32 q0, [r4] ; CHECK-NEXT: vmvn.i32 q1, #0x80000000 ; CHECK-NEXT: .LBB0_4: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrd r4, r3, [r0], #8 +; CHECK-NEXT: ldrd r4, r2, [r0], #8 ; CHECK-NEXT: movs r5, #0 ; CHECK-NEXT: ldrd r7, r6, [r1], #8 -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: smull r4, r11, r7, r4 -; CHECK-NEXT: asrl r4, r11, #31 +; CHECK-NEXT: smull r4, r7, r7, r4 +; CHECK-NEXT: asrl r4, r7, #31 ; CHECK-NEXT: rsbs.w r9, r4, #-2147483648 ; CHECK-NEXT: mov.w r9, #-1 -; CHECK-NEXT: sbcs.w r3, r9, r11 +; CHECK-NEXT: sbcs.w r3, r9, r7 ; CHECK-NEXT: csetm r3, lt ; CHECK-NEXT: bfi r5, r3, #0, #8 -; CHECK-NEXT: ldr r3, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: smull r6, r3, r6, r3 -; CHECK-NEXT: asrl r6, r3, #31 -; CHECK-NEXT: rsbs.w r7, r6, #-2147483648 -; CHECK-NEXT: vmov q2[2], q2[0], r4, r6 -; CHECK-NEXT: sbcs.w r7, r9, r3 -; CHECK-NEXT: vmov q2[3], q2[1], r11, r3 -; CHECK-NEXT: csetm r7, lt -; CHECK-NEXT: mvn r6, #-2147483648 -; CHECK-NEXT: bfi r5, r7, #8, #8 +; CHECK-NEXT: smull r2, r3, r6, r2 +; CHECK-NEXT: asrl r2, r3, #31 +; CHECK-NEXT: rsbs.w r6, r2, #-2147483648 +; CHECK-NEXT: vmov q2[2], q2[0], r4, r2 +; CHECK-NEXT: sbcs.w r6, r9, r3 +; CHECK-NEXT: vmov q2[3], q2[1], r7, r3 +; CHECK-NEXT: csetm r6, lt +; CHECK-NEXT: bfi r5, r6, #8, #8 ; CHECK-NEXT: vmsr p0, r5 +; CHECK-NEXT: mvn r5, #-2147483648 ; CHECK-NEXT: vpsel q2, q2, q0 -; CHECK-NEXT: vmov r3, r4, d4 -; CHECK-NEXT: subs r3, r3, r6 -; CHECK-NEXT: sbcs r3, r4, #0 -; CHECK-NEXT: mov.w r4, #0 -; CHECK-NEXT: csetm r3, lt -; CHECK-NEXT: bfi r4, r3, #0, #8 -; CHECK-NEXT: vmov r3, r5, d5 -; CHECK-NEXT: subs r3, r3, r6 -; CHECK-NEXT: sbcs r3, r5, #0 -; CHECK-NEXT: csetm r3, lt -; CHECK-NEXT: bfi r4, r3, #8, #8 -; CHECK-NEXT: vmsr p0, r4 +; CHECK-NEXT: vmov r2, r3, d4 +; CHECK-NEXT: subs r2, r2, r5 +; CHECK-NEXT: sbcs r2, r3, #0 +; CHECK-NEXT: mov.w r3, #0 +; CHECK-NEXT: csetm r2, lt +; CHECK-NEXT: bfi r3, r2, #0, #8 +; CHECK-NEXT: vmov r2, r4, d5 +; CHECK-NEXT: subs r2, r2, r5 +; CHECK-NEXT: sbcs r2, r4, #0 +; CHECK-NEXT: csetm r2, lt +; CHECK-NEXT: bfi r3, r2, #8, #8 +; CHECK-NEXT: vmsr p0, r3 ; CHECK-NEXT: vpsel q2, q2, q1 -; CHECK-NEXT: vmov r3, s10 -; CHECK-NEXT: vmov r4, s8 -; CHECK-NEXT: strd r4, r3, [r2], #8 +; CHECK-NEXT: vmov r2, s10 +; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: strd r3, r2, [r11], #8 ; CHECK-NEXT: le lr, .LBB0_4 ; CHECK-NEXT: @ %bb.5: @ %middle.block -; CHECK-NEXT: ldrd r7, r3, [sp] @ 8-byte Folded Reload -; CHECK-NEXT: cmp r7, r3 +; CHECK-NEXT: ldrd r2, r3, [sp] @ 8-byte Folded Reload +; CHECK-NEXT: cmp r2, r3 ; CHECK-NEXT: beq .LBB0_8 ; CHECK-NEXT: .LBB0_6: @ %for.body.preheader -; CHECK-NEXT: sub.w lr, r3, r7 +; CHECK-NEXT: sub.w lr, r3, r2 ; CHECK-NEXT: mov.w r0, #-1 ; CHECK-NEXT: mov.w r1, #-2147483648 -; CHECK-NEXT: mvn r2, #-2147483648 +; CHECK-NEXT: mvn r3, #-2147483648 ; CHECK-NEXT: .LBB0_7: @ %for.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr r3, [r12], #4 +; CHECK-NEXT: ldr r2, [r12], #4 ; CHECK-NEXT: ldr r4, [r8], #4 -; CHECK-NEXT: smull r4, r3, r4, r3 -; CHECK-NEXT: asrl r4, r3, #31 -; CHECK-NEXT: subs r5, r1, r4 -; CHECK-NEXT: sbcs.w r5, r0, r3 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r4, r4, r1, ne -; CHECK-NEXT: csel r3, r3, r0, ne -; CHECK-NEXT: subs r5, r4, r2 -; CHECK-NEXT: sbcs r3, r3, #0 -; CHECK-NEXT: csel r3, r4, r2, lt -; CHECK-NEXT: str r3, [r10], #4 +; CHECK-NEXT: smull r2, r5, r4, r2 +; CHECK-NEXT: asrl r2, r5, #31 +; CHECK-NEXT: subs r4, r1, r2 +; CHECK-NEXT: sbcs.w r4, r0, r5 +; CHECK-NEXT: csel r2, r2, r1, lt +; CHECK-NEXT: csel r4, r5, r0, lt +; CHECK-NEXT: subs r5, r2, r3 +; CHECK-NEXT: sbcs r4, r4, #0 +; CHECK-NEXT: csel r2, r2, r3, lt +; CHECK-NEXT: str r2, [r10], #4 ; CHECK-NEXT: le lr, .LBB0_7 ; CHECK-NEXT: .LBB0_8: @ %for.cond.cleanup -; CHECK-NEXT: add sp, #12 +; CHECK-NEXT: add sp, #8 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.9: @@ -322,10 +319,8 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(ptr nocapture readonly %pSrcA, ptr no ; CHECK-NEXT: asrl r4, r1, #31 ; CHECK-NEXT: subs r5, r3, r4 ; CHECK-NEXT: sbcs.w r5, r0, r1 -; CHECK-NEXT: cset r5, lt -; CHECK-NEXT: cmp r5, #0 -; CHECK-NEXT: csel r4, r4, r3, ne -; CHECK-NEXT: csel r1, r1, r0, ne +; CHECK-NEXT: csel r4, r4, r3, lt +; CHECK-NEXT: csel r1, r1, r0, lt ; CHECK-NEXT: subs r5, r4, r2 ; CHECK-NEXT: sbcs r1, r1, #0 ; CHECK-NEXT: csel r1, r4, r2, lt diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll index 6f2539e3cad9a..cc856730d90cf 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll @@ -5,25 +5,25 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_oeq_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_oeq_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: cset r0, eq +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: cset r2, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, eq -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -41,13 +41,13 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_one_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: csinc r0, r0, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: csinc r1, r1, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -57,13 +57,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float> ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi ; CHECK-MVE-NEXT: csinc r3, r3, zr, le -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -82,25 +82,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ogt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: cset r0, gt +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: cset r2, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, gt -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -118,25 +118,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_oge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: cset r0, ge +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: cset r2, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, ge -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -154,25 +154,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_olt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: cset r0, mi +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: cset r2, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -190,25 +190,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ole_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: cset r0, ls +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: cset r2, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, ls -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -226,13 +226,13 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ueq_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s3, s7 ; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: csinc r0, r0, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, eq ; CHECK-MVE-NEXT: csinc r1, r1, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -242,13 +242,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float> ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, eq ; CHECK-MVE-NEXT: csinc r3, r3, zr, vc -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -295,25 +295,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ugt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: cset r0, hi +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: cset r2, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, hi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -331,25 +331,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_uge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: cset r0, pl +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: cset r2, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, pl -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -367,25 +367,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ult_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: cset r0, lt +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: cset r2, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, lt -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -403,25 +403,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ule_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: cset r0, le +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: cset r2, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, le -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -439,25 +439,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ord_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: cset r0, vc +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: cset r2, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, vc -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -476,25 +476,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, <4 x float> %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_uno_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s5 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s7 +; CHECK-MVE-NEXT: cset r0, vs +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s5 ; CHECK-MVE-NEXT: cset r1, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s6 ; CHECK-MVE-NEXT: cset r2, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, vs -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll index d42c393743f4f..586b731c934be 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -5,25 +5,25 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_oeq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_oeq_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: cset r0, eq +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: cset r2, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, eq -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -44,13 +44,13 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_one_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: csinc r0, r0, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: csinc r1, r1, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -60,13 +60,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, float %src2 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi ; CHECK-MVE-NEXT: csinc r3, r3, zr, le -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -88,25 +88,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ogt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: cset r0, gt +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: cset r2, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, gt -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -127,25 +127,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_oge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: cset r0, ge +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: cset r2, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, ge -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -166,25 +166,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_olt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: cset r0, mi +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: cset r2, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -205,25 +205,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ole_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: cset r0, ls +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: cset r2, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, ls -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -244,13 +244,13 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ueq_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s3, s4 ; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: csinc r0, r0, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, eq ; CHECK-MVE-NEXT: csinc r1, r1, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -260,13 +260,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, float %src2 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, eq ; CHECK-MVE-NEXT: csinc r3, r3, zr, vc -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -319,25 +319,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ugt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: cset r0, hi +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: cset r2, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, hi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -358,25 +358,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_uge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: cset r0, pl +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: cset r2, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, pl -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -397,25 +397,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ult_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: cset r0, lt +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: cset r2, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, lt -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -436,25 +436,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ule_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: cset r0, le +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: cset r2, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, le -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -475,25 +475,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ord_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: cset r0, vc +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: cset r2, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, vc -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -515,25 +515,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_uno_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s4 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s4 -; CHECK-MVE-NEXT: cset r0, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s4 +; CHECK-MVE-NEXT: cset r0, vs +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s4 ; CHECK-MVE-NEXT: cset r1, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s4 ; CHECK-MVE-NEXT: cset r2, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, vs -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1614,25 +1614,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_oeq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_oeq_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 -; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: cset r0, eq +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: cset r1, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 ; CHECK-MVE-NEXT: cset r2, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, eq -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1653,13 +1653,13 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_one_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: csinc r0, r0, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: csinc r1, r1, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -1669,13 +1669,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, float %sr ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi ; CHECK-MVE-NEXT: csinc r3, r3, zr, le -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1697,25 +1697,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ogt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ogt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 -; CHECK-MVE-NEXT: cset r0, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: cset r0, gt +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: cset r1, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 ; CHECK-MVE-NEXT: cset r2, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, gt -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1736,25 +1736,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_oge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_oge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 -; CHECK-MVE-NEXT: cset r0, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: cset r0, ge +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: cset r1, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 ; CHECK-MVE-NEXT: cset r2, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, ge -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1775,25 +1775,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_olt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_olt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 -; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: cset r0, mi +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 ; CHECK-MVE-NEXT: cset r2, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1814,25 +1814,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ole_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ole_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 -; CHECK-MVE-NEXT: cset r0, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: cset r0, ls +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: cset r1, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 ; CHECK-MVE-NEXT: cset r2, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, ls -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1853,13 +1853,13 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ueq_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s3 ; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: csinc r0, r0, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: cset r1, eq ; CHECK-MVE-NEXT: csinc r1, r1, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -1869,13 +1869,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, float %sr ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, eq ; CHECK-MVE-NEXT: csinc r3, r3, zr, vc -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1928,25 +1928,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ugt_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ugt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 -; CHECK-MVE-NEXT: cset r0, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: cset r0, hi +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: cset r1, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 ; CHECK-MVE-NEXT: cset r2, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, hi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -1967,25 +1967,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_uge_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_uge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 -; CHECK-MVE-NEXT: cset r0, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: cset r0, pl +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: cset r1, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 ; CHECK-MVE-NEXT: cset r2, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, pl -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -2006,25 +2006,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ult_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ult_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 -; CHECK-MVE-NEXT: cset r0, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: cset r0, lt +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: cset r1, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 ; CHECK-MVE-NEXT: cset r2, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, lt -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -2045,25 +2045,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ule_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ule_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 -; CHECK-MVE-NEXT: cset r0, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: cset r0, le +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: cset r1, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 ; CHECK-MVE-NEXT: cset r2, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, le -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -2084,25 +2084,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ord_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ord_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 -; CHECK-MVE-NEXT: cset r0, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: cset r0, vc +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: cset r1, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 ; CHECK-MVE-NEXT: cset r2, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, vc -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; @@ -2124,25 +2124,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_uno_v4f32(<4 x float> %src, float %src2, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_uno_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s4, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s0 -; CHECK-MVE-NEXT: cset r0, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s3 +; CHECK-MVE-NEXT: cset r0, vs +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s4, s1 ; CHECK-MVE-NEXT: cset r1, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s4, s2 ; CHECK-MVE-NEXT: cset r2, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, vs -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s14, s10 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s13, s9 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s15, s11 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s12, s8 ; CHECK-MVE-NEXT: bx lr ; diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll index 718657839d38d..de8b413bf24e5 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll @@ -5,25 +5,25 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_oeq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_oeq_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, eq +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, eq -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -41,13 +41,13 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_one_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: csinc r0, r0, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: csinc r1, r1, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -57,13 +57,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_one_v4f32(<4 x float> %src, <4 x float> ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi ; CHECK-MVE-NEXT: csinc r3, r3, zr, le -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -82,25 +82,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ogt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ogt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, gt +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, gt -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -118,25 +118,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_oge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_oge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, ge +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, ge -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -154,25 +154,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_olt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_olt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, mi +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -190,25 +190,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ole_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ole_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, ls +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, ls -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -226,13 +226,13 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ueq_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: csinc r0, r0, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, eq ; CHECK-MVE-NEXT: csinc r1, r1, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -242,13 +242,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_ueq_v4f32(<4 x float> %src, <4 x float> ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, eq ; CHECK-MVE-NEXT: csinc r3, r3, zr, vc -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -295,25 +295,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ugt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ugt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, hi +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, hi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -331,25 +331,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_uge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_uge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, pl +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, pl -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -367,25 +367,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ult_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ult_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, lt +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, lt -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -403,25 +403,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ule_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ule_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, le +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, le -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -439,25 +439,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_ord_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_ord_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s0 -; CHECK-MVE-NEXT: cset r0, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s3 +; CHECK-MVE-NEXT: cset r0, vc +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s1 ; CHECK-MVE-NEXT: cset r1, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s2 ; CHECK-MVE-NEXT: cset r2, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, vc -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -476,25 +476,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_uno_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_uno_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s0 -; CHECK-MVE-NEXT: cset r0, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s3 +; CHECK-MVE-NEXT: cset r0, vs +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s1 ; CHECK-MVE-NEXT: cset r1, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s2 ; CHECK-MVE-NEXT: cset r2, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, vs -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1530,25 +1530,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_oeq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_oeq_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, eq +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, eq ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, eq -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1566,13 +1566,13 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_one_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: csinc r0, r0, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: csinc r1, r1, zr, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -1582,13 +1582,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_one_v4f32(<4 x float> %src, <4 x floa ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi ; CHECK-MVE-NEXT: csinc r3, r3, zr, le -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1607,25 +1607,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ogt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ogt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, mi +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, mi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, mi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1643,25 +1643,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_oge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_oge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, ls +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, ls ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, ls -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1679,25 +1679,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_olt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_olt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, gt +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, gt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, gt -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1715,25 +1715,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ole_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ole_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, ge +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, ge ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, ge -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1751,13 +1751,13 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ueq_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s3, #0 ; CHECK-MVE-NEXT: cset r0, eq ; CHECK-MVE-NEXT: csinc r0, r0, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, eq ; CHECK-MVE-NEXT: csinc r1, r1, zr, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr @@ -1767,13 +1767,13 @@ define arm_aapcs_vfpcc <4 x float> @vcmp_r_ueq_v4f32(<4 x float> %src, <4 x floa ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, eq ; CHECK-MVE-NEXT: csinc r3, r3, zr, vc -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1820,25 +1820,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ugt_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ugt_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, lt +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, lt ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, lt -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1856,25 +1856,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_uge_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_uge_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, le +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, le ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, le -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1892,25 +1892,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ult_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ult_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, hi +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, hi ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, hi -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1928,25 +1928,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ule_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ule_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, #0 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, #0 -; CHECK-MVE-NEXT: cset r0, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, #0 +; CHECK-MVE-NEXT: cset r0, pl +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, #0 ; CHECK-MVE-NEXT: cset r1, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, #0 ; CHECK-MVE-NEXT: cset r2, pl ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, pl -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -1964,25 +1964,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_ord_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_ord_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s0 -; CHECK-MVE-NEXT: cset r0, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s3 +; CHECK-MVE-NEXT: cset r0, vc +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s1 ; CHECK-MVE-NEXT: cset r1, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s2 ; CHECK-MVE-NEXT: cset r2, vc ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, vc -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; @@ -2001,25 +2001,25 @@ entry: define arm_aapcs_vfpcc <4 x float> @vcmp_r_uno_v4f32(<4 x float> %src, <4 x float> %a, <4 x float> %b) { ; CHECK-MVE-LABEL: vcmp_r_uno_v4f32: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vcmp.f32 s1, s1 -; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s0, s0 -; CHECK-MVE-NEXT: cset r0, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s3, s3 +; CHECK-MVE-NEXT: cset r0, vs +; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-MVE-NEXT: vcmp.f32 s1, s1 ; CHECK-MVE-NEXT: cset r1, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: vcmp.f32 s2, s2 ; CHECK-MVE-NEXT: cset r2, vs ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-MVE-NEXT: cset r3, vs -; CHECK-MVE-NEXT: cmp r2, #0 -; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 ; CHECK-MVE-NEXT: cmp r3, #0 ; CHECK-MVE-NEXT: vseleq.f32 s2, s10, s6 -; CHECK-MVE-NEXT: cmp r0, #0 +; CHECK-MVE-NEXT: cmp r2, #0 ; CHECK-MVE-NEXT: vseleq.f32 s1, s9, s5 ; CHECK-MVE-NEXT: cmp r1, #0 +; CHECK-MVE-NEXT: vseleq.f32 s3, s11, s7 +; CHECK-MVE-NEXT: cmp r0, #0 ; CHECK-MVE-NEXT: vseleq.f32 s0, s8, s4 ; CHECK-MVE-NEXT: bx lr ; diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll index 898380760bd4d..35e578e425e74 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv-vminv-scalar.ll @@ -499,16 +499,12 @@ define arm_aapcs_vfpcc i64 @uminv2i64(<2 x i64> %vec, i64 %min) { ; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: subs.w r4, r2, r12 ; CHECK-NEXT: sbcs.w r4, r3, lr -; CHECK-NEXT: cset r4, lo -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r2, r2, r12, ne -; CHECK-NEXT: csel r3, r3, lr, ne +; CHECK-NEXT: csel r2, r2, r12, lo +; CHECK-NEXT: csel r3, r3, lr, lo ; CHECK-NEXT: subs r4, r2, r0 ; CHECK-NEXT: sbcs.w r4, r3, r1 -; CHECK-NEXT: cset r4, lo -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r2, r0, ne -; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: csel r0, r2, r0, lo +; CHECK-NEXT: csel r1, r3, r1, lo ; CHECK-NEXT: pop {r4, pc} %x = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %vec) %cmp = icmp ult i64 %x, %min @@ -525,16 +521,12 @@ define arm_aapcs_vfpcc i64 @sminv2i64(<2 x i64> %vec, i64 %min) { ; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: subs.w r4, r2, r12 ; CHECK-NEXT: sbcs.w r4, r3, lr -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r2, r2, r12, ne -; CHECK-NEXT: csel r3, r3, lr, ne +; CHECK-NEXT: csel r2, r2, r12, lt +; CHECK-NEXT: csel r3, r3, lr, lt ; CHECK-NEXT: subs r4, r2, r0 ; CHECK-NEXT: sbcs.w r4, r3, r1 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r2, r0, ne -; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: csel r0, r2, r0, lt +; CHECK-NEXT: csel r1, r3, r1, lt ; CHECK-NEXT: pop {r4, pc} %x = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %vec) %cmp = icmp slt i64 %x, %min @@ -551,16 +543,12 @@ define arm_aapcs_vfpcc i64 @umaxv2i64(<2 x i64> %vec, i64 %max) { ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: subs.w r4, r2, r12 ; CHECK-NEXT: sbcs.w r4, r3, lr -; CHECK-NEXT: cset r4, lo -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r2, r12, r2, ne -; CHECK-NEXT: csel r3, lr, r3, ne +; CHECK-NEXT: csel r2, r12, r2, lo +; CHECK-NEXT: csel r3, lr, r3, lo ; CHECK-NEXT: subs r4, r0, r2 ; CHECK-NEXT: sbcs.w r4, r1, r3 -; CHECK-NEXT: cset r4, lo -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r2, r0, ne -; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: csel r0, r2, r0, lo +; CHECK-NEXT: csel r1, r3, r1, lo ; CHECK-NEXT: pop {r4, pc} %x = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %vec) %cmp = icmp ugt i64 %x, %max @@ -577,16 +565,12 @@ define arm_aapcs_vfpcc i64 @smaxv2i64(<2 x i64> %vec, i64 %max) { ; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: subs.w r4, r2, r12 ; CHECK-NEXT: sbcs.w r4, r3, lr -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r2, r12, r2, ne -; CHECK-NEXT: csel r3, lr, r3, ne +; CHECK-NEXT: csel r2, r12, r2, lt +; CHECK-NEXT: csel r3, lr, r3, lt ; CHECK-NEXT: subs r4, r0, r2 ; CHECK-NEXT: sbcs.w r4, r1, r3 -; CHECK-NEXT: cset r4, lt -; CHECK-NEXT: cmp r4, #0 -; CHECK-NEXT: csel r0, r2, r0, ne -; CHECK-NEXT: csel r1, r3, r1, ne +; CHECK-NEXT: csel r0, r2, r0, lt +; CHECK-NEXT: csel r1, r3, r1, lt ; CHECK-NEXT: pop {r4, pc} %x = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %vec) %cmp = icmp sgt i64 %x, %max diff --git a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll index 642ff69ded33f..bf0d92b5e0303 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vqdmulh.ll @@ -502,8 +502,8 @@ define <2 x i64> @large_i128(<2 x double> %x) { ; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: csel r0, r0, r7, ne ; CHECK-NEXT: csel r3, r3, r7, ne -; CHECK-NEXT: csel r2, r2, r9, ne ; CHECK-NEXT: csel r1, r1, r7, ne +; CHECK-NEXT: csel r2, r2, r9, ne ; CHECK-NEXT: rsbs r7, r0, #0 ; CHECK-NEXT: sbcs.w r7, r4, r1 ; CHECK-NEXT: sbcs.w r2, r4, r2 @@ -521,8 +521,8 @@ define <2 x i64> @large_i128(<2 x double> %x) { ; CHECK-NEXT: cmp r5, #0 ; CHECK-NEXT: csel r0, r0, r5, ne ; CHECK-NEXT: csel r3, r3, r5, ne -; CHECK-NEXT: csel r2, r2, r9, ne ; CHECK-NEXT: csel r1, r1, r5, ne +; CHECK-NEXT: csel r2, r2, r9, ne ; CHECK-NEXT: rsbs r5, r0, #0 ; CHECK-NEXT: sbcs.w r5, r4, r1 ; CHECK-NEXT: sbcs.w r2, r4, r2 diff --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll index cff16c300e703..fe1d06cb39e16 100644 --- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll @@ -8,121 +8,119 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; THUMBV7-NEXT: .pad #44 ; THUMBV7-NEXT: sub sp, #44 -; THUMBV7-NEXT: ldr.w lr, [sp, #88] -; THUMBV7-NEXT: mov r11, r0 -; THUMBV7-NEXT: ldr r4, [sp, #96] -; THUMBV7-NEXT: ldr.w r12, [sp, #80] -; THUMBV7-NEXT: umull r1, r5, r2, lr -; THUMBV7-NEXT: umull r7, r6, r3, r4 -; THUMBV7-NEXT: str r1, [sp, #40] @ 4-byte Spill -; THUMBV7-NEXT: ldr r1, [sp, #100] -; THUMBV7-NEXT: umull r4, r0, r4, r2 -; THUMBV7-NEXT: str r7, [sp, #32] @ 4-byte Spill -; THUMBV7-NEXT: umull r7, r1, r1, r2 -; THUMBV7-NEXT: str r4, [sp, #24] @ 4-byte Spill -; THUMBV7-NEXT: str r0, [sp, #12] @ 4-byte Spill -; THUMBV7-NEXT: ldr r0, [sp, #84] -; THUMBV7-NEXT: str r7, [sp, #20] @ 4-byte Spill -; THUMBV7-NEXT: ldr r7, [sp, #92] -; THUMBV7-NEXT: umull r10, r8, r0, lr -; THUMBV7-NEXT: umull r4, r9, r7, r12 -; THUMBV7-NEXT: str r4, [sp, #8] @ 4-byte Spill -; THUMBV7-NEXT: umull r4, r0, r12, lr -; THUMBV7-NEXT: mov.w r12, #0 -; THUMBV7-NEXT: umlal r5, r12, r3, lr -; THUMBV7-NEXT: str r4, [sp, #16] @ 4-byte Spill +; THUMBV7-NEXT: ldr.w r8, [sp, #88] +; THUMBV7-NEXT: mov r9, r0 +; THUMBV7-NEXT: ldr r7, [sp, #96] +; THUMBV7-NEXT: ldr.w lr, [sp, #100] +; THUMBV7-NEXT: umull r0, r5, r2, r8 +; THUMBV7-NEXT: ldr r4, [sp, #80] +; THUMBV7-NEXT: str r0, [sp, #32] @ 4-byte Spill +; THUMBV7-NEXT: umull r1, r0, r3, r7 ; THUMBV7-NEXT: str r0, [sp, #4] @ 4-byte Spill -; THUMBV7-NEXT: umull r4, r2, r2, r7 -; THUMBV7-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; THUMBV7-NEXT: str r4, [sp, #28] @ 4-byte Spill +; THUMBV7-NEXT: umull r0, r11, lr, r2 +; THUMBV7-NEXT: str r1, [sp, #20] @ 4-byte Spill +; THUMBV7-NEXT: ldr r1, [sp, #92] +; THUMBV7-NEXT: str r0, [sp] @ 4-byte Spill +; THUMBV7-NEXT: umull r0, r10, r7, r2 +; THUMBV7-NEXT: mov r7, r1 +; THUMBV7-NEXT: umull r6, r12, r1, r4 +; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill +; THUMBV7-NEXT: ldr r0, [sp, #84] +; THUMBV7-NEXT: str r6, [sp, #24] @ 4-byte Spill +; THUMBV7-NEXT: umull r6, r1, r0, r8 +; THUMBV7-NEXT: str r6, [sp, #16] @ 4-byte Spill +; THUMBV7-NEXT: umull r6, r2, r2, r7 +; THUMBV7-NEXT: mov r7, r4 +; THUMBV7-NEXT: strd r6, r2, [sp, #8] @ 8-byte Folded Spill +; THUMBV7-NEXT: umull r2, r6, r4, r8 ; THUMBV7-NEXT: str r2, [sp, #36] @ 4-byte Spill -; THUMBV7-NEXT: str.w r0, [r11] -; THUMBV7-NEXT: ldr r0, [sp, #32] @ 4-byte Reload +; THUMBV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload +; THUMBV7-NEXT: str r6, [sp, #28] @ 4-byte Spill +; THUMBV7-NEXT: movs r6, #0 +; THUMBV7-NEXT: str.w r2, [r9] +; THUMBV7-NEXT: umlal r5, r6, r3, r8 ; THUMBV7-NEXT: ldr r2, [sp, #20] @ 4-byte Reload -; THUMBV7-NEXT: add r2, r0 -; THUMBV7-NEXT: ldr r0, [sp, #12] @ 4-byte Reload -; THUMBV7-NEXT: adds.w lr, r0, r2 +; THUMBV7-NEXT: ldr r4, [sp] @ 4-byte Reload +; THUMBV7-NEXT: add r4, r2 +; THUMBV7-NEXT: adds.w r2, r10, r4 +; THUMBV7-NEXT: str r2, [sp, #20] @ 4-byte Spill ; THUMBV7-NEXT: mov.w r2, #0 -; THUMBV7-NEXT: adc r0, r2, #0 -; THUMBV7-NEXT: str r0, [sp, #32] @ 4-byte Spill -; THUMBV7-NEXT: ldr r0, [sp, #8] @ 4-byte Reload -; THUMBV7-NEXT: add.w r4, r10, r0 -; THUMBV7-NEXT: ldr r0, [sp, #4] @ 4-byte Reload -; THUMBV7-NEXT: adds r4, r4, r0 -; THUMBV7-NEXT: adc r0, r2, #0 -; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill -; THUMBV7-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; THUMBV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload -; THUMBV7-NEXT: adds.w r10, r2, r0 -; THUMBV7-NEXT: mov r2, r3 -; THUMBV7-NEXT: adc.w r0, r4, lr -; THUMBV7-NEXT: ldr.w lr, [sp, #100] +; THUMBV7-NEXT: adc r2, r2, #0 +; THUMBV7-NEXT: cmp.w r12, #0 +; THUMBV7-NEXT: str r2, [sp, #32] @ 4-byte Spill +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne.w r12, #1 ; THUMBV7-NEXT: cmp r1, #0 -; THUMBV7-NEXT: str r0, [sp, #24] @ 4-byte Spill +; THUMBV7-NEXT: ldr r2, [sp, #96] ; THUMBV7-NEXT: it ne ; THUMBV7-NEXT: movne r1, #1 -; THUMBV7-NEXT: cmp r3, #0 -; THUMBV7-NEXT: mov r0, lr +; THUMBV7-NEXT: orrs.w r10, r7, r0 ; THUMBV7-NEXT: it ne -; THUMBV7-NEXT: movne r2, #1 -; THUMBV7-NEXT: cmp.w lr, #0 +; THUMBV7-NEXT: movne.w r10, #1 +; THUMBV7-NEXT: orrs.w r7, r2, lr +; THUMBV7-NEXT: ldr r2, [sp, #92] +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne r7, #1 +; THUMBV7-NEXT: cmp r0, #0 ; THUMBV7-NEXT: it ne ; THUMBV7-NEXT: movne r0, #1 -; THUMBV7-NEXT: ldr r4, [sp, #28] @ 4-byte Reload -; THUMBV7-NEXT: ands r0, r2 -; THUMBV7-NEXT: orrs r1, r0 -; THUMBV7-NEXT: adds r5, r5, r4 -; THUMBV7-NEXT: str.w r5, [r11, #4] -; THUMBV7-NEXT: ldr r0, [sp, #36] @ 4-byte Reload -; THUMBV7-NEXT: mov.w r5, #0 -; THUMBV7-NEXT: adcs.w r0, r0, r12 -; THUMBV7-NEXT: adc r2, r5, #0 -; THUMBV7-NEXT: cmp r6, #0 +; THUMBV7-NEXT: cmp r2, #0 +; THUMBV7-NEXT: mov r4, r2 +; THUMBV7-NEXT: mov r8, r2 ; THUMBV7-NEXT: it ne -; THUMBV7-NEXT: movne r6, #1 -; THUMBV7-NEXT: orrs r1, r6 -; THUMBV7-NEXT: ldr r6, [sp, #84] -; THUMBV7-NEXT: umlal r0, r2, r3, r7 -; THUMBV7-NEXT: ldr r3, [sp, #32] @ 4-byte Reload -; THUMBV7-NEXT: cmp r7, #0 +; THUMBV7-NEXT: movne r4, #1 +; THUMBV7-NEXT: ldr r2, [sp, #8] @ 4-byte Reload +; THUMBV7-NEXT: ands r0, r4 +; THUMBV7-NEXT: movs r4, #0 +; THUMBV7-NEXT: adds r5, r5, r2 +; THUMBV7-NEXT: str.w r5, [r9, #4] +; THUMBV7-NEXT: orr.w r0, r0, r1 +; THUMBV7-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; THUMBV7-NEXT: ldr r2, [sp, #16] @ 4-byte Reload +; THUMBV7-NEXT: and.w r5, r10, r7 +; THUMBV7-NEXT: orr.w r0, r0, r12 +; THUMBV7-NEXT: mov.w r12, #0 +; THUMBV7-NEXT: add r1, r2 +; THUMBV7-NEXT: ldr r2, [sp, #12] @ 4-byte Reload +; THUMBV7-NEXT: adcs r2, r6 +; THUMBV7-NEXT: ldr r6, [sp, #28] @ 4-byte Reload +; THUMBV7-NEXT: adc r7, r4, #0 +; THUMBV7-NEXT: adds r1, r1, r6 +; THUMBV7-NEXT: umlal r2, r7, r3, r8 +; THUMBV7-NEXT: adc r4, r4, #0 +; THUMBV7-NEXT: orrs r0, r4 +; THUMBV7-NEXT: orrs r0, r5 +; THUMBV7-NEXT: ldrd r5, r4, [sp, #36] @ 8-byte Folded Reload +; THUMBV7-NEXT: adds r5, r5, r4 +; THUMBV7-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; THUMBV7-NEXT: adcs r1, r4 +; THUMBV7-NEXT: ldr r4, [sp, #4] @ 4-byte Reload +; THUMBV7-NEXT: cmp r4, #0 ; THUMBV7-NEXT: it ne -; THUMBV7-NEXT: movne r7, #1 -; THUMBV7-NEXT: orrs r1, r3 -; THUMBV7-NEXT: mov r3, r6 -; THUMBV7-NEXT: cmp r6, #0 +; THUMBV7-NEXT: movne r4, #1 +; THUMBV7-NEXT: cmp r3, #0 ; THUMBV7-NEXT: it ne ; THUMBV7-NEXT: movne r3, #1 -; THUMBV7-NEXT: cmp.w r8, #0 -; THUMBV7-NEXT: and.w r3, r3, r7 -; THUMBV7-NEXT: ldr r7, [sp, #80] -; THUMBV7-NEXT: it ne -; THUMBV7-NEXT: movne.w r8, #1 -; THUMBV7-NEXT: cmp.w r9, #0 -; THUMBV7-NEXT: it ne -; THUMBV7-NEXT: movne.w r9, #1 -; THUMBV7-NEXT: orrs r7, r6 -; THUMBV7-NEXT: ldr r6, [sp, #96] +; THUMBV7-NEXT: cmp.w lr, #0 ; THUMBV7-NEXT: it ne -; THUMBV7-NEXT: movne r7, #1 -; THUMBV7-NEXT: orr.w r3, r3, r8 -; THUMBV7-NEXT: orrs.w r6, r6, lr -; THUMBV7-NEXT: orr.w r3, r3, r9 +; THUMBV7-NEXT: movne.w lr, #1 +; THUMBV7-NEXT: cmp.w r11, #0 ; THUMBV7-NEXT: it ne -; THUMBV7-NEXT: movne r6, #1 -; THUMBV7-NEXT: adds.w r0, r0, r10 -; THUMBV7-NEXT: str.w r0, [r11, #8] -; THUMBV7-NEXT: ldr r0, [sp, #24] @ 4-byte Reload -; THUMBV7-NEXT: adcs r0, r2 -; THUMBV7-NEXT: str.w r0, [r11, #12] -; THUMBV7-NEXT: ldr r0, [sp, #40] @ 4-byte Reload -; THUMBV7-NEXT: and.w r2, r7, r6 -; THUMBV7-NEXT: orr.w r0, r0, r3 -; THUMBV7-NEXT: orr.w r0, r0, r2 +; THUMBV7-NEXT: movne.w r11, #1 +; THUMBV7-NEXT: adds r2, r2, r5 +; THUMBV7-NEXT: and.w r3, r3, lr +; THUMBV7-NEXT: str.w r2, [r9, #8] +; THUMBV7-NEXT: adcs r1, r7 +; THUMBV7-NEXT: str.w r1, [r9, #12] +; THUMBV7-NEXT: orr.w r1, r3, r11 +; THUMBV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload +; THUMBV7-NEXT: orr.w r1, r1, r4 +; THUMBV7-NEXT: orr.w r1, r1, r2 ; THUMBV7-NEXT: orr.w r0, r0, r1 -; THUMBV7-NEXT: adc r1, r5, #0 +; THUMBV7-NEXT: adc r1, r12, #0 ; THUMBV7-NEXT: orrs r0, r1 ; THUMBV7-NEXT: and r0, r0, #1 -; THUMBV7-NEXT: strb.w r0, [r11, #16] +; THUMBV7-NEXT: strb.w r0, [r9, #16] ; THUMBV7-NEXT: add sp, #44 ; THUMBV7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} start: diff --git a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll index 161adf7e7d763..55e917159fce9 100644 --- a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll +++ b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll @@ -4,33 +4,32 @@ define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 { ; THUMBV7-LABEL: mulodi_test: ; THUMBV7: @ %bb.0: @ %start -; THUMBV7-NEXT: .save {r4, r5, r6, lr} -; THUMBV7-NEXT: push {r4, r5, r6, lr} +; THUMBV7-NEXT: .save {r4, r5, r7, lr} +; THUMBV7-NEXT: push {r4, r5, r7, lr} ; THUMBV7-NEXT: umull r12, lr, r3, r0 -; THUMBV7-NEXT: movs r6, #0 -; THUMBV7-NEXT: umull r4, r5, r1, r2 -; THUMBV7-NEXT: umull r0, r2, r0, r2 -; THUMBV7-NEXT: add r4, r12 -; THUMBV7-NEXT: adds.w r12, r2, r4 -; THUMBV7-NEXT: adc r2, r6, #0 ; THUMBV7-NEXT: cmp r3, #0 ; THUMBV7-NEXT: it ne ; THUMBV7-NEXT: movne r3, #1 ; THUMBV7-NEXT: cmp r1, #0 +; THUMBV7-NEXT: umull r0, r4, r0, r2 +; THUMBV7-NEXT: umull r2, r5, r1, r2 ; THUMBV7-NEXT: it ne ; THUMBV7-NEXT: movne r1, #1 +; THUMBV7-NEXT: ands r1, r3 ; THUMBV7-NEXT: cmp r5, #0 -; THUMBV7-NEXT: and.w r1, r1, r3 ; THUMBV7-NEXT: it ne ; THUMBV7-NEXT: movne r5, #1 ; THUMBV7-NEXT: orrs r1, r5 ; THUMBV7-NEXT: cmp.w lr, #0 ; THUMBV7-NEXT: it ne ; THUMBV7-NEXT: movne.w lr, #1 -; THUMBV7-NEXT: orr.w r1, r1, lr -; THUMBV7-NEXT: orrs r2, r1 -; THUMBV7-NEXT: mov r1, r12 -; THUMBV7-NEXT: pop {r4, r5, r6, pc} +; THUMBV7-NEXT: orr.w r3, r1, lr +; THUMBV7-NEXT: add.w r1, r2, r12 +; THUMBV7-NEXT: movs r2, #0 +; THUMBV7-NEXT: adds r1, r1, r4 +; THUMBV7-NEXT: adc r2, r2, #0 +; THUMBV7-NEXT: orrs r2, r3 +; THUMBV7-NEXT: pop {r4, r5, r7, pc} start: %0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2 %1 = extractvalue { i64, i1 } %0, 0