diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 2c9ba203fbf3d..80b0be016da77 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2217,6 +2217,31 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
     }
   }
 
+  // Check that each user of each old PHI node is something that we can
+  // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
+  for (auto *OldPN : OldPhiNodes) {
+    for (User *V : OldPN->users()) {
+      if (auto *SI = dyn_cast<StoreInst>(V)) {
+        if (!SI->isSimple() || SI->getOperand(0) != OldPN)
+          return nullptr;
+      } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
+        // Verify it's a B->A cast.
+        Type *TyB = BCI->getOperand(0)->getType();
+        Type *TyA = BCI->getType();
+        if (TyA != DestTy || TyB != SrcTy)
+          return nullptr;
+      } else if (auto *PHI = dyn_cast<PHINode>(V)) {
+        // As long as the user is another old PHI node, then even if we don't
+        // rewrite it, the PHI web we're considering won't have any users
+        // outside itself, so it'll be dead.
+        if (OldPhiNodes.count(PHI) == 0)
+          return nullptr;
+      } else {
+        return nullptr;
+      }
+    }
+  }
+
   // For each old PHI node, create a corresponding new PHI node with a type A.
   SmallDenseMap<PHINode *, PHINode *> NewPNodes;
   for (auto *OldPN : OldPhiNodes) {
@@ -2234,9 +2259,14 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
       if (auto *C = dyn_cast<Constant>(V)) {
         NewV = ConstantExpr::getBitCast(C, DestTy);
       } else if (auto *LI = dyn_cast<LoadInst>(V)) {
-        Builder.SetInsertPoint(LI->getNextNode());
-        NewV = Builder.CreateBitCast(LI, DestTy);
-        Worklist.Add(LI);
+        // Explicitly perform load combine to make sure no opposing transform
+        // can remove the bitcast in the meantime and trigger an infinite loop.
+        Builder.SetInsertPoint(LI);
+        NewV = combineLoadToNewType(*LI, DestTy);
+        // Remove the old load and its use in the old phi, which itself becomes
+        // dead once the whole transform finishes.
+        replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
+        eraseInstFromFunction(*LI);
       } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
         NewV = BCI->getOperand(0);
       } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
@@ -2259,26 +2289,33 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
   Instruction *RetVal = nullptr;
   for (auto *OldPN : OldPhiNodes) {
     PHINode *NewPN = NewPNodes[OldPN];
-    for (User *V : OldPN->users()) {
+    for (auto It = OldPN->user_begin(), End = OldPN->user_end(); It != End; ) {
+      User *V = *It;
+      // We may remove this user, advance to avoid iterator invalidation.
+      ++It;
       if (auto *SI = dyn_cast<StoreInst>(V)) {
-        if (SI->isSimple() && SI->getOperand(0) == OldPN) {
-          Builder.SetInsertPoint(SI);
-          auto *NewBC =
-            cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
-          SI->setOperand(0, NewBC);
-          Worklist.Add(SI);
-          assert(hasStoreUsersOnly(*NewBC));
-        }
+        assert(SI->isSimple() && SI->getOperand(0) == OldPN);
+        Builder.SetInsertPoint(SI);
+        auto *NewBC =
+          cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
+        SI->setOperand(0, NewBC);
+        Worklist.Add(SI);
+        assert(hasStoreUsersOnly(*NewBC));
       }
       else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
-        // Verify it's a B->A cast.
         Type *TyB = BCI->getOperand(0)->getType();
         Type *TyA = BCI->getType();
-        if (TyA == DestTy && TyB == SrcTy) {
-          Instruction *I = replaceInstUsesWith(*BCI, NewPN);
-          if (BCI == &CI)
-            RetVal = I;
-        }
+        assert(TyA == DestTy && TyB == SrcTy);
+        (void) TyA;
+        (void) TyB;
+        Instruction *I = replaceInstUsesWith(*BCI, NewPN);
+        if (BCI == &CI)
+          RetVal = I;
+      } else if (auto *PHI = dyn_cast<PHINode>(V)) {
+        assert(OldPhiNodes.count(PHI) > 0);
+        (void) PHI;
+      } else {
+        llvm_unreachable("all uses should be handled");
       }
     }
   }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 434b0d5912157..c2951cc6f4c4f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -405,6 +405,9 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
   /// \return true if successful.
   bool replacePointer(Instruction &I, Value *V);
 
+  LoadInst *combineLoadToNewType(LoadInst &LI, Type *NewTy,
+                                 const Twine &Suffix = "");
+
 private:
   bool shouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const;
   bool shouldChangeType(Type *From, Type *To) const;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 054fb7da09a22..8a80efa729b19 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -448,8 +448,8 @@ static bool isSupportedAtomicType(Type *Ty) {
 ///
 /// Note that this will create all of the instructions with whatever insert
 /// point the \c InstCombiner currently is using.
-static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
-                                      const Twine &Suffix = "") {
+LoadInst *InstCombiner::combineLoadToNewType(LoadInst &LI, Type *NewTy,
+                                             const Twine &Suffix) {
   assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
          "can't fold an atomic load to requested type");
 
@@ -462,9 +462,9 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
   if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
         NewPtr->getType()->getPointerElementType() == NewTy &&
         NewPtr->getType()->getPointerAddressSpace() == AS))
-    NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
+    NewPtr = Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
 
-  LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
+  LoadInst *NewLoad = Builder.CreateAlignedLoad(
       NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
   NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
   MDBuilder MDB(NewLoad->getContext());
@@ -505,7 +505,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
         NewLoad->setMetadata(ID, N);
       break;
     case LLVMContext::MD_range:
-      copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
+      copyRangeMetadata(getDataLayout(), LI, N, *NewLoad);
       break;
     }
   }
@@ -639,9 +639,8 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
           return SI && SI->getPointerOperand() != &LI &&
                  !SI->getPointerOperand()->isSwiftError();
         })) {
-      LoadInst *NewLoad = combineLoadToNewType(
-          IC, LI,
-          Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
+      LoadInst *NewLoad = IC.combineLoadToNewType(
+          LI, Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
       // Replace all the stores with stores of the newly loaded value.
       for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
         auto *SI = cast<StoreInst>(*UI++);
@@ -663,7 +662,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
     if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
       if (CI->isNoopCast(DL))
         if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
-          LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
+          LoadInst *NewLoad = IC.combineLoadToNewType(LI, CI->getDestTy());
           CI->replaceAllUsesWith(NewLoad);
           IC.eraseInstFromFunction(*CI);
           return &LI;
@@ -691,8 +690,8 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
     // If the struct only have one element, we unpack.
     auto NumElements = ST->getNumElements();
     if (NumElements == 1) {
-      LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
-                                               ".unpack");
+      LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
+                                                  ".unpack");
       AAMDNodes AAMD;
       LI.getAAMetadata(AAMD);
       NewLoad->setAAMetadata(AAMD);
@@ -741,7 +740,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
     auto *ET = AT->getElementType();
     auto NumElements = AT->getNumElements();
     if (NumElements == 1) {
-      LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
+      LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
       AAMDNodes AAMD;
       LI.getAAMetadata(AAMD);
       NewLoad->setAAMetadata(AAMD);
@@ -1377,8 +1376,8 @@ static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC,
     return false;
 
   IC.Builder.SetInsertPoint(LI);
-  LoadInst *NewLI = combineLoadToNewType(
-      IC, *LI, LoadAddr->getType()->getPointerElementType());
+  LoadInst *NewLI = IC.combineLoadToNewType(
+      *LI, LoadAddr->getType()->getPointerElementType());
   // Replace all the stores with stores of the newly loaded value.
   for (auto *UI : LI->users()) {
     auto *USI = cast<StoreInst>(UI);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 385f4926b845a..8e8a302c56fe0 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -121,6 +121,9 @@ STATISTIC(NumReassoc  , "Number of reassociations");
 DEBUG_COUNTER(VisitCounter, "instcombine-visit",
               "Controls which instructions are visited");
 
+static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
+static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
+
 static cl::opt<bool>
 EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
                                               cl::init(true));
@@ -129,6 +132,12 @@ static cl::opt<bool>
 EnableExpensiveCombines("expensive-combines",
                         cl::desc("Enable expensive instruction combines"));
 
+static cl::opt<unsigned> InfiniteLoopDetectionThreshold(
+    "instcombine-infinite-loop-threshold",
+    cl::desc("Number of instruction combining iterations considered an "
+             "infinite loop"),
+    cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden);
+
 static cl::opt<unsigned>
 MaxArraySize("instcombine-maxarray-size", cl::init(1024),
              cl::desc("Maximum array size considered when doing a combine"));
@@ -3508,9 +3517,16 @@ static bool combineInstructionsOverFunction(
     MadeIRChange = LowerDbgDeclare(F);
 
   // Iterate while there is work to do.
-  int Iteration = 0;
+  unsigned Iteration = 0;
   while (true) {
     ++Iteration;
+
+    if (Iteration > InfiniteLoopDetectionThreshold) {
+      report_fatal_error(
+          "Instruction Combining seems stuck in an infinite loop after " +
+          Twine(InfiniteLoopDetectionThreshold) + " iterations.");
+    }
+
     LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
                       << F.getName() << "\n");
 
diff --git a/llvm/test/Transforms/InstCombine/bitcast-phi-uselistorder.ll b/llvm/test/Transforms/InstCombine/bitcast-phi-uselistorder.ll
new file mode 100644
index 0000000000000..d5489484bddf2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/bitcast-phi-uselistorder.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+@Q = internal unnamed_addr global double 1.000000e+00, align 8
+
+define double @test(i1 %c, i64* %p) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[IF:%.*]], label [[END:%.*]]
+; CHECK:       if:
+; CHECK-NEXT:    [[LOAD1:%.*]] = load double, double* @Q, align 8
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[LOAD1]], [[IF]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to double*
+; CHECK-NEXT:    store double [[TMP0]], double* [[TMP1]], align 8
+; CHECK-NEXT:    ret double [[TMP0]]
+;
+entry:
+  br i1 %c, label %if, label %end
+
+if:
+  %load = load i64, i64* bitcast (double* @Q to i64*), align 8
+  br label %end
+
+end:
+  %phi = phi i64 [ 0, %entry ], [ %load, %if ]
+  store i64 %phi, i64* %p, align 8
+  %cast = bitcast i64 %phi to double
+  ret double %cast
+
+  uselistorder i64 %phi, { 1, 0 }
+}
diff --git a/llvm/test/Transforms/InstCombine/pr44242.ll b/llvm/test/Transforms/InstCombine/pr44242.ll
new file mode 100644
index 0000000000000..5e783af734785
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr44242.ll
@@ -0,0 +1,190 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; Check that we don't create two redundant phi nodes when %val is used in a
+; form where we can't rewrite it in terms of the new phi node.
+
+; Use %val in an instruction type not supported by optimizeBitCastFromPhi.
+define float @sitofp(float %x) {
+; CHECK-LABEL: @sitofp(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop_header:
+; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL_INCR_CASTED:%.*]], [[LOOP:%.*]] ]
+; CHECK-NEXT:    [[VAL_CASTED:%.*]] = bitcast i32 [[VAL]] to float
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[VAL_CASTED]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[VAL_INCR:%.*]] = fadd float [[VAL_CASTED]], 1.000000e+00
+; CHECK-NEXT:    [[VAL_INCR_CASTED]] = bitcast float [[VAL_INCR]] to i32
+; CHECK-NEXT:    br label [[LOOP_HEADER]]
+; CHECK:       end:
+; CHECK-NEXT:    [[RESULT:%.*]] = sitofp i32 [[VAL]] to float
+; CHECK-NEXT:    ret float [[RESULT]]
+;
+entry:
+  br label %loop_header
+loop_header:
+  %val = phi i32 [ 0, %entry ], [ %val_incr_casted, %loop ]
+  %val_casted = bitcast i32 %val to float
+  %cmp = fcmp ogt float %val_casted, %x
+  br i1 %cmp, label %end, label %loop
+loop:
+  %val_incr = fadd float %val_casted, 1.0
+  %val_incr_casted = bitcast float %val_incr to i32
+  br label %loop_header
+end:
+  %result = sitofp i32 %val to float
+  ret float %result
+}
+
+; Use %val in an incompatible bitcast.
+define <2 x i16> @bitcast(float %x) {
+; CHECK-LABEL: @bitcast(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop_header:
+; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL_INCR_CASTED:%.*]], [[LOOP:%.*]] ]
+; CHECK-NEXT:    [[VAL_CASTED:%.*]] = bitcast i32 [[VAL]] to float
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[VAL_CASTED]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[VAL_INCR:%.*]] = fadd float [[VAL_CASTED]], 1.000000e+00
+; CHECK-NEXT:    [[VAL_INCR_CASTED]] = bitcast float [[VAL_INCR]] to i32
+; CHECK-NEXT:    br label [[LOOP_HEADER]]
+; CHECK:       end:
+; CHECK-NEXT:    [[RESULT:%.*]] = bitcast i32 [[VAL]] to <2 x i16>
+; CHECK-NEXT:    ret <2 x i16> [[RESULT]]
+;
+entry:
+  br label %loop_header
+loop_header:
+  %val = phi i32 [ 0, %entry ], [ %val_incr_casted, %loop ]
+  %val_casted = bitcast i32 %val to float
+  %cmp = fcmp ogt float %val_casted, %x
+  br i1 %cmp, label %end, label %loop
+loop:
+  %val_incr = fadd float %val_casted, 1.0
+  %val_incr_casted = bitcast float %val_incr to i32
+  br label %loop_header
+end:
+  %result = bitcast i32 %val to <2 x i16>
+  ret <2 x i16> %result
+}
+
+@global = global i32 0
+
+; Use %val with a volatile store.
+define void @store_volatile(float %x) {
+; CHECK-LABEL: @store_volatile(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop_header:
+; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL_INCR_CASTED:%.*]], [[LOOP:%.*]] ]
+; CHECK-NEXT:    [[VAL_CASTED:%.*]] = bitcast i32 [[VAL]] to float
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[VAL_CASTED]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[VAL_INCR:%.*]] = fadd float [[VAL_CASTED]], 1.000000e+00
+; CHECK-NEXT:    [[VAL_INCR_CASTED]] = bitcast float [[VAL_INCR]] to i32
+; CHECK-NEXT:    br label [[LOOP_HEADER]]
+; CHECK:       end:
+; CHECK-NEXT:    store volatile i32 [[VAL]], i32* @global, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop_header
+loop_header:
+  %val = phi i32 [ 0, %entry ], [ %val_incr_casted, %loop ]
+  %val_casted = bitcast i32 %val to float
+  %cmp = fcmp ogt float %val_casted, %x
+  br i1 %cmp, label %end, label %loop
+loop:
+  %val_incr = fadd float %val_casted, 1.0
+  %val_incr_casted = bitcast float %val_incr to i32
+  br label %loop_header
+end:
+  store volatile i32 %val, i32* @global
+  ret void
+}
+
+; Use %val with a store where it's actually the address.
+define void @store_address(i32 %x) {
+; CHECK-LABEL: @store_address(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop_header:
+; CHECK-NEXT:    [[VAL:%.*]] = phi i32* [ @global, [[ENTRY:%.*]] ], [ [[VAL_INCR1:%.*]], [[LOOP:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[LOOP]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[VAL_INCR1]] = getelementptr i32, i32* [[VAL]], i64 1
+; CHECK-NEXT:    br label [[LOOP_HEADER]]
+; CHECK:       end:
+; CHECK-NEXT:    store i32 0, i32* [[VAL]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop_header
+loop_header:
+  %val = phi i32* [ @global, %entry ], [ %val_incr_casted, %loop ]
+  %i = phi i32 [ 0, %entry ], [ %i_incr, %loop ]
+  %val_casted = bitcast i32* %val to float*
+  %cmp = icmp sgt i32 %i, %x
+  br i1 %cmp, label %end, label %loop
+loop:
+  %i_incr = add i32 %i, 0
+  %val_incr = getelementptr float, float* %val_casted, i32 1
+  %val_incr_casted = bitcast float* %val_incr to i32*
+  br label %loop_header
+end:
+  store i32 0, i32* %val
+  ret void
+}
+
+; Test where a phi (%val2) other than the original one (%val) has an
+; incompatible use.
+define i32 @multiple_phis(float %x) {
+; CHECK-LABEL: @multiple_phis(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop_header:
+; CHECK-NEXT:    [[VAL:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL2:%.*]], [[LOOP_END:%.*]] ]
+; CHECK-NEXT:    [[VAL_CASTED:%.*]] = bitcast i32 [[VAL]] to float
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[VAL_CASTED]], [[X:%.*]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[END:%.*]], label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[CMP2:%.*]] = fcmp ogt float [[VAL_CASTED]], 2.000000e+00
+; CHECK-NEXT:    br i1 [[CMP2]], label [[IF:%.*]], label [[LOOP_END]]
+; CHECK:       if:
+; CHECK-NEXT:    [[VAL_INCR:%.*]] = fadd float [[VAL_CASTED]], 1.000000e+00
+; CHECK-NEXT:    [[VAL_INCR_CASTED:%.*]] = bitcast float [[VAL_INCR]] to i32
+; CHECK-NEXT:    br label [[LOOP_END]]
+; CHECK:       loop_end:
+; CHECK-NEXT:    [[VAL2]] = phi i32 [ [[VAL]], [[LOOP]] ], [ [[VAL_INCR_CASTED]], [[IF]] ]
+; CHECK-NEXT:    store volatile i32 [[VAL2]], i32* @global, align 4
+; CHECK-NEXT:    br label [[LOOP_HEADER]]
+; CHECK:       end:
+; CHECK-NEXT:    ret i32 [[VAL]]
+;
+entry:
+  br label %loop_header
+loop_header:
+  %val = phi i32 [ 0, %entry ], [ %val2, %loop_end ]
+  %val_casted = bitcast i32 %val to float
+  %cmp = fcmp ogt float %val_casted, %x
+  br i1 %cmp, label %end, label %loop
+loop:
+  %cmp2 = fcmp ogt float %val_casted, 2.0
+  br i1 %cmp2, label %if, label %loop_end
+if:
+  %val_incr = fadd float %val_casted, 1.0
+  %val_incr_casted = bitcast float %val_incr to i32
+  br label %loop_end
+loop_end:
+  %val2 = phi i32 [ %val, %loop ], [ %val_incr_casted, %if ]
+  store volatile i32 %val2, i32* @global ; the incompatible use
+  br label %loop_header
+end:
+  ret i32 %val
+}
diff --git a/llvm/test/Transforms/InstCombine/pr44245.ll b/llvm/test/Transforms/InstCombine/pr44245.ll
new file mode 100644
index 0000000000000..f75e26e87add8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr44245.ll
@@ -0,0 +1,192 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine -instcombine-infinite-loop-threshold=2 < %s | FileCheck %s
+
+; This used to cause on infinite instcombine loop.
+
+define void @test(i1 %c) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  bb16:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[BB17:%.*]], label [[BB24:%.*]]
+; CHECK:       bb17:
+; CHECK-NEXT:    [[TMP0:%.*]] = phi i8* [ [[TMP1:%.*]], [[BB47:%.*]] ], [ undef, [[BB16:%.*]] ]
+; CHECK-NEXT:    store i8* [[TMP0]], i8** undef, align 8
+; CHECK-NEXT:    ret void
+; CHECK:       bb24:
+; CHECK-NEXT:    br i1 [[C]], label [[BB44:%.*]], label [[BB49:%.*]]
+; CHECK:       bb44:
+; CHECK-NEXT:    [[TMP467:%.*]] = load i8*, i8** inttoptr (i64 16 to i8**), align 16
+; CHECK-NEXT:    br label [[BB47]]
+; CHECK:       bb47:
+; CHECK-NEXT:    [[TMP1]] = phi i8* [ [[TMP2:%.*]], [[BB150:%.*]] ], [ [[TMP1221:%.*]], [[BB119:%.*]] ], [ [[TMP1032:%.*]], [[BB101:%.*]] ], [ [[TMP933:%.*]], [[BB91:%.*]] ], [ [[TMP834:%.*]], [[BB81:%.*]] ], [ [[TMP705:%.*]], [[BB67:%.*]] ], [ [[TMP586:%.*]], [[BB56:%.*]] ], [ [[TMP467]], [[BB44]] ]
+; CHECK-NEXT:    br label [[BB17]]
+; CHECK:       bb49:
+; CHECK-NEXT:    br i1 [[C]], label [[BB56]], label [[BB59:%.*]]
+; CHECK:       bb56:
+; CHECK-NEXT:    [[TMP586]] = load i8*, i8** inttoptr (i64 16 to i8**), align 16
+; CHECK-NEXT:    br label [[BB47]]
+; CHECK:       bb59:
+; CHECK-NEXT:    br i1 [[C]], label [[BB67]], label [[BB71:%.*]]
+; CHECK:       bb67:
+; CHECK-NEXT:    [[TMP705]] = load i8*, i8** inttoptr (i64 16 to i8**), align 16
+; CHECK-NEXT:    br label [[BB47]]
+; CHECK:       bb71:
+; CHECK-NEXT:    br i1 [[C]], label [[BB81]], label [[BB84:%.*]]
+; CHECK:       bb81:
+; CHECK-NEXT:    [[TMP834]] = load i8*, i8** inttoptr (i64 16 to i8**), align 16
+; CHECK-NEXT:    br label [[BB47]]
+; CHECK:       bb84:
+; CHECK-NEXT:    br i1 [[C]], label [[BB91]], label [[BB94:%.*]]
+; CHECK:       bb91:
+; CHECK-NEXT:    [[TMP933]] = load i8*, i8** inttoptr (i64 16 to i8**), align 16
+; CHECK-NEXT:    br label [[BB47]]
+; CHECK:       bb94:
+; CHECK-NEXT:    br i1 [[C]], label [[BB101]], label [[BB104:%.*]]
+; CHECK:       bb101:
+; CHECK-NEXT:    [[TMP1032]] = load i8*, i8** inttoptr (i64 16 to i8**), align 16
+; CHECK-NEXT:    br label [[BB47]]
+; CHECK:       bb104:
+; CHECK-NEXT:    br i1 [[C]], label [[BB119]], label [[BB123:%.*]]
+; CHECK:       bb119:
+; CHECK-NEXT:    [[TMP1221]] = load i8*, i8** inttoptr (i64 16 to i8**), align 16
+; CHECK-NEXT:    br label [[BB47]]
+; CHECK:       bb123:
+; CHECK-NEXT:    br i1 [[C]], label [[BB147:%.*]], label [[BB152:%.*]]
+; CHECK:       bb147:
+; CHECK-NEXT:    [[TMP1499:%.*]] = load i8*, i8** inttoptr (i64 16 to i8**), align 16
+; CHECK-NEXT:    br label [[BB150]]
+; CHECK:       bb150:
+; CHECK-NEXT:    [[TMP2]] = phi i8* [ [[TMP1848:%.*]], [[BB152]] ], [ [[TMP1499]], [[BB147]] ]
+; CHECK-NEXT:    br label [[BB47]]
+; CHECK:       bb152:
+; CHECK-NEXT:    [[TMP1848]] = load i8*, i8** inttoptr (i64 16 to i8**), align 16
+; CHECK-NEXT:    call void undef()
+; CHECK-NEXT:    br label [[BB150]]
+;
+bb16:                                             ; preds = %bb
+  br i1 %c, label %bb17, label %bb24
+
+bb17:                                             ; preds = %bb47, %bb17
+  %0 = phi i8* [ %1, %bb47 ], [ undef, %bb16 ]
+  store i8* %0, i8** undef, align 8
+  ret void
+
+bb24:                                             ; preds = %bb24
+  br i1 %c, label %bb44, label %bb49
+
+bb44:                                             ; preds = %bb43
+  %tmp46 = load i64*, i64** inttoptr (i64 16 to i64**), align 16
+  br label %bb47
+
+bb47:                                             ; preds = %bb150, %bb119, %bb101, %bb91, %bb81, %bb67, %bb56, %bb44
+  %.in1 = phi i64* [ %.in, %bb150 ], [ %tmp122, %bb119 ], [ %tmp103, %bb101 ], [ %tmp93, %bb91 ], [ %tmp83, %bb81 ], [ %tmp70, %bb67 ], [ %tmp58, %bb56 ], [ %tmp46, %bb44 ]
+  %1 = bitcast i64* %.in1 to i8*
+  br label %bb17
+
+bb49:                                             ; preds = %bb49
+  br i1 %c, label %bb56, label %bb59
+
+bb56:                                             ; preds = %bb55
+  %tmp58 = load i64*, i64** inttoptr (i64 16 to i64**), align 16
+  br label %bb47
+
+bb59:                                             ; preds = %bb59
+  br i1 %c, label %bb67, label %bb71
+
+bb67:                                             ; preds = %bb66
+  %tmp70 = load i64*, i64** inttoptr (i64 16 to i64**), align 16
+  br label %bb47
+
+bb71:                                             ; preds = %bb71
+  br i1 %c, label %bb81, label %bb84
+
+bb81:                                             ; preds = %bb80
+  %tmp83 = load i64*, i64** inttoptr (i64 16 to i64**), align 16
+  br label %bb47
+
+bb84:                                             ; preds = %bb84
+  br i1 %c, label %bb91, label %bb94
+
+bb91:                                             ; preds = %bb90
+  %tmp93 = load i64*, i64** inttoptr (i64 16 to i64**), align 16
+  br label %bb47
+
+bb94:                                            ; preds = %bb94
+  br i1 %c, label %bb101, label %bb104
+
+bb101:                                            ; preds = %bb100
+  %tmp103 = load i64*, i64** inttoptr (i64 16 to i64**), align 16
+  br label %bb47
+
+bb104:                                            ; preds = %bb104
+  br i1 %c, label %bb119, label %bb123
+
+bb119:                                            ; preds = %bb118
+  %tmp122 = load i64*, i64** inttoptr (i64 16 to i64**), align 16
+  br label %bb47
+
+bb123:                                            ; preds = %bb123
+  br i1 %c, label %bb147, label %bb152
+
+bb147:                                            ; preds = %bb146
+  %tmp149 = load i64*, i64** inttoptr (i64 16 to i64**), align 16
+  br label %bb150
+
+bb150:                                            ; preds = %bb152, %bb147
+  %.in = phi i64* [ %tmp184, %bb152 ], [ %tmp149, %bb147 ]
+  br label %bb47
+
+bb152:                                            ; preds = %bb146
+  %tmp184 = load i64*, i64** inttoptr (i64 16 to i64**), align 16
+  call void undef()
+  br label %bb150
+}
+
+; This used to cause an instcombine loop when the problem above was
+; addressed in a non-robust fashion.
+
+%type_1 = type {}
+%type_2 = type {}
+%type_3 = type {}
+
+define void @test_2(i1 %c) local_unnamed_addr {
+; CHECK-LABEL: @test_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[WHILE_COND:%.*]]
+; CHECK:       while.cond:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[COND_TRUE133:%.*]], label [[COND_FALSE138:%.*]]
+; CHECK:       cond.true133:
+; CHECK-NEXT:    store %type_3* undef, %type_3** null, align 536870912
+; CHECK-NEXT:    br label [[COND_END144:%.*]]
+; CHECK:       cond.false138:
+; CHECK-NEXT:    store %type_3* undef, %type_3** null, align 536870912
+; CHECK-NEXT:    br label [[COND_END144]]
+; CHECK:       cond.end144:
+; CHECK-NEXT:    br label [[WHILE_COND]]
+;
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %cond.end144, %entry
+  %link.0 = phi %type_2* [ undef, %entry ], [ %cond145, %cond.end144 ]
+  %os115 = bitcast %type_2* %link.0 to %type_3*
+  %ou116 = getelementptr inbounds %type_3, %type_3* %os115, i32 0
+  %os1117 = bitcast %type_3* %ou116 to %type_1*
+  br label %for.cond
+
+for.cond:                                         ; preds = %while.cond
+  br i1 %c, label %cond.true133, label %cond.false138
+
+cond.true133:                                     ; preds = %sw.epilog
+  %0 = load %type_2*, %type_2** undef, align 8
+  br label %cond.end144
+
+cond.false138:                                    ; preds = %sw.epilog
+  %1 = load %type_2*, %type_2** undef, align 8
+  br label %cond.end144
+
+cond.end144:                                      ; preds = %cond.false138, %cond.true133
+  %cond145 = phi %type_2* [ %0, %cond.true133 ], [ %1, %cond.false138 ]
+  br label %while.cond
+}