From 447ccd294b097e23a044de543de77d627e5363fa Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Fri, 4 Apr 2025 23:51:46 +0800 Subject: [PATCH 1/6] [JumpThreading] Copy metadata when inserting preload into preds --- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index ba598d8415b18..a681036ea78ee 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1410,6 +1410,13 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { if (AATags) NewVal->setAAMetadata(AATags); + if (auto *MD = LoadI->getMetadata(LLVMContext::MD_invariant_load)) + NewVal->setMetadata(LLVMContext::MD_invariant_load, MD); + if (auto *InvGroupMD = LoadI->getMetadata(LLVMContext::MD_invariant_group)) + NewVal->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD); + if (auto *RangeMD = LoadI->getMetadata(LLVMContext::MD_range)) + NewVal->setMetadata(LLVMContext::MD_range, RangeMD); + AvailablePreds.emplace_back(UnavailablePred, NewVal); } From 763c5847882246ee281e8206517c811eeb15f285 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Thu, 17 Apr 2025 17:48:59 +0800 Subject: [PATCH 2/6] [JumpThreading][GVN] Use `copyMetadataForLoad` --- llvm/lib/Transforms/Scalar/GVN.cpp | 21 +------------------- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 10 +--------- llvm/lib/Transforms/Utils/Local.cpp | 1 + 3 files changed, 3 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 6233e8e2ee681..31bdbc95fc277 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1516,26 +1516,7 @@ void GVNPass::eliminatePartiallyRedundantLoad( MSSAU->insertUse(cast(NewAccess), /*RenameUses=*/true); } - // Transfer the old load's AA tags to the new load. - AAMDNodes Tags = Load->getAAMetadata(); - if (Tags) - NewLoad->setAAMetadata(Tags); - - if (auto *MD = Load->getMetadata(LLVMContext::MD_invariant_load)) - NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD); - if (auto *InvGroupMD = Load->getMetadata(LLVMContext::MD_invariant_group)) - NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD); - if (auto *RangeMD = Load->getMetadata(LLVMContext::MD_range)) - NewLoad->setMetadata(LLVMContext::MD_range, RangeMD); - if (auto *AccessMD = Load->getMetadata(LLVMContext::MD_access_group)) - if (LI->getLoopFor(Load->getParent()) == LI->getLoopFor(UnavailableBlock)) - NewLoad->setMetadata(LLVMContext::MD_access_group, AccessMD); - - // We do not propagate the old load's debug location, because the new - // load now lives in a different BB, and we want to avoid a jumpy line - // table. - // FIXME: How do we retain source locations without causing poor debugging - // behavior? + copyMetadataForLoad(*NewLoad, *Load); // Add the newly created load. ValuesPerBlock.push_back( diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index a681036ea78ee..8d790648c2ef9 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1407,15 +1407,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { LoadI->getOrdering(), LoadI->getSyncScopeID(), UnavailablePred->getTerminator()->getIterator()); NewVal->setDebugLoc(LoadI->getDebugLoc()); - if (AATags) - NewVal->setAAMetadata(AATags); - - if (auto *MD = LoadI->getMetadata(LLVMContext::MD_invariant_load)) - NewVal->setMetadata(LLVMContext::MD_invariant_load, MD); - if (auto *InvGroupMD = LoadI->getMetadata(LLVMContext::MD_invariant_group)) - NewVal->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD); - if (auto *RangeMD = LoadI->getMetadata(LLVMContext::MD_range)) - NewVal->setMetadata(LLVMContext::MD_range, RangeMD); + copyMetadataForLoad(*NewVal, *LoadI); AvailablePreds.emplace_back(UnavailablePred, NewVal); } diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 2f3ea2266e07f..fab45b1dfb088 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3468,6 +3468,7 @@ void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) { case LLVMContext::MD_fpmath: case LLVMContext::MD_tbaa_struct: case LLVMContext::MD_invariant_load: + case LLVMContext::MD_invariant_group: case LLVMContext::MD_alias_scope: case LLVMContext::MD_noalias: case LLVMContext::MD_nontemporal: From 673ab49f50a981e59d23eee13ce0b2af320836d1 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Fri, 18 Apr 2025 16:06:14 +0800 Subject: [PATCH 3/6] [JumpThreading] Add tests. NFC. --- .../test/Transforms/JumpThreading/pre-load.ll | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/llvm/test/Transforms/JumpThreading/pre-load.ll b/llvm/test/Transforms/JumpThreading/pre-load.ll index d9a2dc20a4189..eef7735acf448 100644 --- a/llvm/test/Transforms/JumpThreading/pre-load.ll +++ b/llvm/test/Transforms/JumpThreading/pre-load.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -passes=jump-threading -S < %s | FileCheck %s @x = global i32 0 @@ -7,6 +7,10 @@ declare void @f() declare void @g() +;. +; CHECK: @x = global i32 0 +; CHECK: @y = global i32 0 +;. define i32 @pre(i1 %cond, i32 %n) { ; CHECK-LABEL: @pre( ; CHECK-NEXT: br i1 [[COND:%.*]], label [[C_THREAD:%.*]], label [[C:%.*]] @@ -82,3 +86,49 @@ NO: call void @g() ret i32 1 } + +define i32 @pre_metadata(i1 %cond) { +; CHECK-LABEL: @pre_metadata( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[C_THREAD:%.*]], label [[C:%.*]] +; CHECK: C.thread: +; CHECK-NEXT: store i32 0, ptr @x, align 4 +; CHECK-NEXT: br label [[YES:%.*]] +; CHECK: C: +; CHECK-NEXT: [[A_PR:%.*]] = load i32, ptr @y, align 4, !range [[RNG0:![0-9]+]], !noundef [[META1:![0-9]+]] +; CHECK-NEXT: [[COND2:%.*]] = icmp eq i32 [[A_PR]], 0 +; CHECK-NEXT: br i1 [[COND2]], label [[YES]], label [[NO:%.*]] +; CHECK: YES: +; CHECK-NEXT: [[A4:%.*]] = phi i32 [ 0, [[C_THREAD]] ], [ [[A_PR]], [[C]] ] +; CHECK-NEXT: call void @f() +; CHECK-NEXT: ret i32 [[A4]] +; CHECK: NO: +; CHECK-NEXT: call void @g() +; CHECK-NEXT: ret i32 1 +; + br i1 %cond, label %A, label %B + +A: + store i32 0, ptr @x, align 4 + br label %C + +B: + br label %C + +C: + %ptr = phi ptr [@x, %A], [@y, %B] + %a = load i32, ptr %ptr, align 4, !range !{i32 0, i32 2}, !noundef !{} + %cond2 = icmp eq i32 %a, 0 + br i1 %cond2, label %YES, label %NO + +YES: + call void @f() + ret i32 %a + +NO: + call void @g() + ret i32 1 +} +;. +; CHECK: [[RNG0]] = !{i32 0, i32 2} +; CHECK: [[META1]] = !{} +;. From 89b09c80ebc1a198bf2eab81b5fcb9ba71a0f066 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 23 Apr 2025 15:01:45 +0800 Subject: [PATCH 4/6] [GVN][JumpThreading] Address review comments --- llvm/lib/Transforms/Scalar/GVN.cpp | 5 +- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 11 ++++- llvm/lib/Transforms/Utils/Local.cpp | 1 - .../MemoryDependenceAnalysis/InvariantLoad.ll | 12 ++--- .../invariant.group-bug.ll | 2 +- .../test/Transforms/GVN/PRE/invariant-load.ll | 2 +- llvm/test/Transforms/GVN/PRE/load-metadata.ll | 2 +- .../PRE/load-pre-metadata-accsess-group.ll | 10 ++-- llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll | 2 +- llvm/test/Transforms/GVN/pr64598.ll | 18 +++---- .../test/Transforms/JumpThreading/pre-load.ll | 48 +++++++++++++++++++ .../PhaseOrdering/X86/spurious-peeling.ll | 4 +- 12 files changed, 85 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 31bdbc95fc277..cdf2cfa311c65 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1516,7 +1516,10 @@ void GVNPass::eliminatePartiallyRedundantLoad( MSSAU->insertUse(cast(NewAccess), /*RenameUses=*/true); } - copyMetadataForLoad(*NewLoad, *Load); + NewLoad->copyMetadata(*Load); + // Drop UB-implying metadata as we do not know if it is guaranteed to + // transfer the execution to the original load. + NewLoad->dropUBImplyingAttrsAndMetadata(); // Add the newly created load. ValuesPerBlock.push_back( diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 8d790648c2ef9..4bad53bc13e07 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1362,11 +1362,14 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { // farther than to a predecessor, we need to reuse the code from GVN's PRE. // It requires domination tree analysis, so for this simple case it is an // overkill. + bool TransfersExecution = false; if (PredsScanned.size() != AvailablePreds.size() && - !isSafeToSpeculativelyExecute(LoadI)) + !isSafeToSpeculativelyExecute(LoadI)) { for (auto I = LoadBB->begin(); &*I != LoadI; ++I) if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) return false; + TransfersExecution = true; + } // If there is exactly one predecessor where the value is unavailable, the // already computed 'OneUnavailablePred' block is it. If it ends in an @@ -1407,7 +1410,11 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { LoadI->getOrdering(), LoadI->getSyncScopeID(), UnavailablePred->getTerminator()->getIterator()); NewVal->setDebugLoc(LoadI->getDebugLoc()); - copyMetadataForLoad(*NewVal, *LoadI); + NewVal->copyMetadata(*LoadI); + // Drop UB-implying metadata if we do not know it is guaranteed to transfer + // the execution to the original load. + if (!TransfersExecution) + NewVal->dropUBImplyingAttrsAndMetadata(); AvailablePreds.emplace_back(UnavailablePred, NewVal); } diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index fab45b1dfb088..2f3ea2266e07f 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3468,7 +3468,6 @@ void llvm::copyMetadataForLoad(LoadInst &Dest, const LoadInst &Source) { case LLVMContext::MD_fpmath: case LLVMContext::MD_tbaa_struct: case LLVMContext::MD_invariant_load: - case LLVMContext::MD_invariant_group: case LLVMContext::MD_alias_scope: case LLVMContext::MD_noalias: case LLVMContext::MD_nontemporal: diff --git a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll b/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll index e49db3d8c3e8e..e98b635dc6086 100644 --- a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll +++ b/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll @@ -10,8 +10,8 @@ declare void @foo(ptr) define i8 @test(i1 %cmp) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P:%.*]] = alloca i8 -; CHECK-NEXT: store i8 5, ptr [[P]] +; CHECK-NEXT: [[P:%.*]] = alloca i8, align 1 +; CHECK-NEXT: store i8 5, ptr [[P]], align 1 ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: ; CHECK-NEXT: [[V:%.*]] = phi i8 [ 5, [[ENTRY:%.*]] ], [ -5, [[ALIVE:%.*]] ] @@ -23,7 +23,7 @@ define i8 @test(i1 %cmp) { ; CHECK-NEXT: br label [[ALIVE]] ; CHECK: alive: ; CHECK-NEXT: [[I_2:%.*]] = phi i8 [ [[I]], [[HEADER]] ], [ [[I_1]], [[DEAD]] ] -; CHECK-NEXT: store i8 -5, ptr [[P]] +; CHECK-NEXT: store i8 -5, ptr [[P]], align 1 ; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[P]], i8 0, i32 1, i1 false) ; CHECK-NEXT: [[I_INC]] = add i8 [[I_2]], 1 ; CHECK-NEXT: [[CMP_LOOP:%.*]] = icmp ugt i8 [[I_INC]], 100 @@ -67,7 +67,7 @@ define i8 @test2(i1 %cmp, ptr %p) { ; CHECK-NEXT: call void @foo(ptr [[P]]) ; CHECK-NEXT: br i1 [[CMP:%.*]], label [[B2:%.*]], label [[B1:%.*]] ; CHECK: b1: -; CHECK-NEXT: [[RES2:%.*]] = load i8, ptr [[P]] +; CHECK-NEXT: [[RES2:%.*]] = load i8, ptr [[P]], align 1 ; CHECK-NEXT: [[RES3:%.*]] = add i8 [[RES1]], [[RES2]] ; CHECK-NEXT: br label [[ALIVE:%.*]] ; CHECK: b2: @@ -105,7 +105,7 @@ define i8 @test3(i1 %cmp, ptr %p) { ; CHECK-NEXT: call void @foo(ptr [[P]]) ; CHECK-NEXT: br i1 [[CMP:%.*]], label [[B1:%.*]], label [[B2:%.*]] ; CHECK: b1: -; CHECK-NEXT: [[RES2:%.*]] = load i8, ptr [[P]] +; CHECK-NEXT: [[RES2:%.*]] = load i8, ptr [[P]], align 1 ; CHECK-NEXT: [[RES3:%.*]] = add i8 [[RES1]], [[RES2]] ; CHECK-NEXT: br label [[ALIVE:%.*]] ; CHECK: b2: @@ -148,7 +148,7 @@ define void @test4() null_pointer_is_valid { ; CHECK-NEXT: [[TMP4:%.*]] = fmul float [[TMP2]], [[TMP2]] ; CHECK-NEXT: [[INVAR_INC3]] = add nuw nsw i64 [[FUSION_INVAR_ADDRESS_DIM_0_03]], 1 ; CHECK-NEXT: [[DOTPHI_TRANS_INSERT:%.*]] = getelementptr inbounds [2 x [1 x [4 x float]]], ptr null, i64 0, i64 [[INVAR_INC3]], i64 0, i64 2 -; CHECK-NEXT: [[DOTPRE]] = load float, ptr [[DOTPHI_TRANS_INSERT]], align 4, !invariant.load !0 +; CHECK-NEXT: [[DOTPRE]] = load float, ptr [[DOTPHI_TRANS_INSERT]], align 4 ; CHECK-NEXT: br label [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]] ; entry: diff --git a/llvm/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll b/llvm/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll index c11191ee96d01..802c969eae2fb 100644 --- a/llvm/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll +++ b/llvm/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll @@ -29,7 +29,7 @@ define void @fail(ptr noalias sret(i1) %arg, ptr %arg1, ptr %arg2, ptr %arg3, i1 ; CHECK-NEXT: br i1 [[ARG4:%.*]], label [[BB10:%.*]], label [[BB29:%.*]] ; CHECK: bb10: ; CHECK-NEXT: [[I14_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds ptr, ptr [[I4]], i64 22 -; CHECK-NEXT: [[I15_PRE:%.*]] = load ptr, ptr [[I14_PHI_TRANS_INSERT]], align 8, !invariant.load [[META6]] +; CHECK-NEXT: [[I15_PRE:%.*]] = load ptr, ptr [[I14_PHI_TRANS_INSERT]], align 8 ; CHECK-NEXT: br label [[BB12:%.*]] ; CHECK: bb12: ; CHECK-NEXT: [[I16:%.*]] = call i64 [[I15_PRE]](ptr nonnull [[ARG1]], ptr null, i64 0) #[[ATTR1]] diff --git a/llvm/test/Transforms/GVN/PRE/invariant-load.ll b/llvm/test/Transforms/GVN/PRE/invariant-load.ll index 71332bf0e0bdb..98df8b45951b3 100644 --- a/llvm/test/Transforms/GVN/PRE/invariant-load.ll +++ b/llvm/test/Transforms/GVN/PRE/invariant-load.ll @@ -162,7 +162,7 @@ define i32 @test8(i1 %cnd, ptr %p) { ; CHECK-NEXT: br i1 [[CND]], label [[TAKEN:%.*]], label [[MERGE:%.*]] ; CHECK: taken: ; CHECK-NEXT: [[P2:%.*]] = call ptr (...) @bar(ptr [[P]]) -; CHECK-NEXT: [[V2_PRE:%.*]] = load i32, ptr [[P2]], align 4, !invariant.load [[META0]] +; CHECK-NEXT: [[V2_PRE:%.*]] = load i32, ptr [[P2]], align 4 ; CHECK-NEXT: br label [[MERGE]] ; CHECK: merge: ; CHECK-NEXT: [[V2:%.*]] = phi i32 [ [[V1]], [[ENTRY:%.*]] ], [ [[V2_PRE]], [[TAKEN]] ] diff --git a/llvm/test/Transforms/GVN/PRE/load-metadata.ll b/llvm/test/Transforms/GVN/PRE/load-metadata.ll index 415812be95b3a..b97854d576383 100644 --- a/llvm/test/Transforms/GVN/PRE/load-metadata.ll +++ b/llvm/test/Transforms/GVN/PRE/load-metadata.ll @@ -8,7 +8,7 @@ block1: block2: br label %block4 ; CHECK: block2: -; CHECK-NEXT: load i32, ptr %p, align 4, !range !0, !invariant.group !1 +; CHECK-NEXT: load i32, ptr %p, align 4, !range !0 block3: store i32 0, ptr %p diff --git a/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll b/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll index 148e308b116ed..303064f6d8586 100644 --- a/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll +++ b/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll @@ -13,14 +13,14 @@ define dso_local void @test1(ptr nocapture readonly %aa, ptr nocapture %bb) loca ; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[DOTPRE:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ] ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, ptr [[AA]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[IDX4]], align 4, !llvm.access.group !0 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[IDX4]], align 4, !llvm.access.group [[ACC_GRP0:![0-9]+]] ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], [[TMP2]] -; CHECK-NEXT: store i32 [[MUL]], ptr [[IDX4]], align 4, !llvm.access.group !0 +; CHECK-NEXT: store i32 [[MUL]], ptr [[IDX4]], align 4, !llvm.access.group [[ACC_GRP0]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]], label [[FOR_END:%.*]] ; CHECK: for.body.for.body_crit_edge: -; CHECK-NEXT: [[DOTPRE]] = load i32, ptr [[IDX]], align 4, !llvm.access.group !0 +; CHECK-NEXT: [[DOTPRE]] = load i32, ptr [[IDX]], align 4 ; CHECK-NEXT: br label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -65,10 +65,10 @@ define dso_local void @test2(ptr nocapture readonly %aa, ptr nocapture %bb) loca ; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_BODY]] ], [ [[MUL:%.*]], [[FOR_BODY2_FOR_BODY2_CRIT_EDGE]] ] ; CHECK-NEXT: [[INDVARS2_IV:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ 1, [[FOR_BODY2_FOR_BODY2_CRIT_EDGE]] ] ; CHECK-NEXT: [[MUL]] = mul nsw i32 [[TMP1]], [[TMP2]] -; CHECK-NEXT: store i32 [[MUL]], ptr [[AA]], align 4, !llvm.access.group !1 +; CHECK-NEXT: store i32 [[MUL]], ptr [[AA]], align 4, !llvm.access.group [[ACC_GRP1:![0-9]+]] ; CHECK-NEXT: br i1 true, label [[FOR_BODY2_FOR_BODY2_CRIT_EDGE]], label [[FOR_END:%.*]] ; CHECK: for.body2.for.body2_crit_edge: -; CHECK-NEXT: [[DOTPRE1]] = load i32, ptr [[IDX]], align 4, !llvm.access.group !1 +; CHECK-NEXT: [[DOTPRE1]] = load i32, ptr [[IDX]], align 4 ; CHECK-NEXT: br label [[FOR_BODY2]] ; CHECK: for.end: ; CHECK-NEXT: br i1 false, label [[FOR_END_FOR_BODY_CRIT_EDGE:%.*]], label [[END:%.*]] diff --git a/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll b/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll index 3df63beefea68..6f1a645ca0c84 100644 --- a/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll +++ b/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll @@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64" ; GVN should preserve the TBAA tag on loads when doing PRE. ; CHECK-LABEL: @test( -; CHECK: %tmp33.pre = load i16, ptr %P, align 2, !tbaa !0 +; CHECK: %tmp33.pre = load i16, ptr %P, align 2 ; CHECK: br label %for.body define void @test(ptr %P, ptr %Q, i1 %arg) nounwind { entry: diff --git a/llvm/test/Transforms/GVN/pr64598.ll b/llvm/test/Transforms/GVN/pr64598.ll index 902af984bce2b..e8e70a0edbd2e 100644 --- a/llvm/test/Transforms/GVN/pr64598.ll +++ b/llvm/test/Transforms/GVN/pr64598.ll @@ -6,23 +6,20 @@ define i32 @main(i64 %x, ptr %d, ptr noalias %p) { ; CHECK-SAME: (i64 [[X:%.*]], ptr [[D:%.*]], ptr noalias [[P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[T1_PRE_PRE_PRE:%.*]] = load ptr, ptr [[P]], align 8 -; CHECK-NEXT: [[T2_PRE_PRE_PRE:%.*]] = load ptr, ptr [[T1_PRE_PRE_PRE]], align 8, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[T2_PRE_PRE_PRE:%.*]] = load ptr, ptr [[T1_PRE_PRE_PRE]], align 8 ; CHECK-NEXT: [[T3_PRE_PRE_PRE:%.*]] = load ptr, ptr [[T2_PRE_PRE_PRE]], align 8 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[T2_PRE_PRE:%.*]] = phi ptr [ [[T2_PRE_PRE23:%.*]], [[LOOP_LATCH:%.*]] ], [ [[T2_PRE_PRE_PRE]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[T1_PRE_PRE:%.*]] = phi ptr [ [[T1_PRE_PRE19:%.*]], [[LOOP_LATCH]] ], [ [[T1_PRE_PRE_PRE]], [[ENTRY]] ] +; CHECK-NEXT: [[T1_PRE_PRE:%.*]] = phi ptr [ [[T1_PRE_PRE19:%.*]], [[LOOP_LATCH:%.*]] ], [ [[T1_PRE_PRE_PRE]], [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP2:%.*]] ; CHECK: loop2: -; CHECK-NEXT: [[T2_PRE_PRE25:%.*]] = phi ptr [ [[T2_PRE_PRE23]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE:%.*]] ], [ [[T2_PRE_PRE]], [[LOOP]] ] -; CHECK-NEXT: [[T1_PRE_PRE21:%.*]] = phi ptr [ [[T1_PRE_PRE19]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T1_PRE_PRE]], [[LOOP]] ] +; CHECK-NEXT: [[T1_PRE_PRE21:%.*]] = phi ptr [ [[T1_PRE_PRE19]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE:%.*]] ], [ [[T1_PRE_PRE]], [[LOOP]] ] ; CHECK-NEXT: [[T3_PRE:%.*]] = phi ptr [ [[T3_PRE16:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T3_PRE_PRE_PRE]], [[LOOP]] ] -; CHECK-NEXT: [[T2_PRE:%.*]] = phi ptr [ [[T2_PRE13:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T2_PRE_PRE]], [[LOOP]] ] +; CHECK-NEXT: [[T2_PRE:%.*]] = phi ptr [ [[T2_PRE13:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T2_PRE_PRE_PRE]], [[LOOP]] ] ; CHECK-NEXT: [[T1_PRE:%.*]] = phi ptr [ [[T1_PRE10:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T1_PRE_PRE]], [[LOOP]] ] ; CHECK-NEXT: br label [[LOOP3:%.*]] ; CHECK: loop3: -; CHECK-NEXT: [[T2_PRE_PRE24:%.*]] = phi ptr [ [[T2_PRE_PRE23]], [[LOOP3_LATCH:%.*]] ], [ [[T2_PRE_PRE25]], [[LOOP2]] ] -; CHECK-NEXT: [[T1_PRE_PRE20:%.*]] = phi ptr [ [[T1_PRE_PRE19]], [[LOOP3_LATCH]] ], [ [[T1_PRE_PRE21]], [[LOOP2]] ] +; CHECK-NEXT: [[T1_PRE_PRE20:%.*]] = phi ptr [ [[T1_PRE_PRE19]], [[LOOP3_LATCH:%.*]] ], [ [[T1_PRE_PRE21]], [[LOOP2]] ] ; CHECK-NEXT: [[T3_PRE17:%.*]] = phi ptr [ [[T3_PRE16]], [[LOOP3_LATCH]] ], [ [[T3_PRE]], [[LOOP2]] ] ; CHECK-NEXT: [[T2_PRE14:%.*]] = phi ptr [ [[T2_PRE13]], [[LOOP3_LATCH]] ], [ [[T2_PRE]], [[LOOP2]] ] ; CHECK-NEXT: [[T1_PRE11:%.*]] = phi ptr [ [[T1_PRE10]], [[LOOP3_LATCH]] ], [ [[T1_PRE]], [[LOOP2]] ] @@ -36,11 +33,10 @@ define i32 @main(i64 %x, ptr %d, ptr noalias %p) { ; CHECK: for.body.lr.ph.i: ; CHECK-NEXT: store i32 0, ptr [[P]], align 4 ; CHECK-NEXT: [[T5_PRE:%.*]] = load ptr, ptr [[P]], align 8 -; CHECK-NEXT: [[T6_PRE:%.*]] = load ptr, ptr [[T5_PRE]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: [[T6_PRE:%.*]] = load ptr, ptr [[T5_PRE]], align 8 ; CHECK-NEXT: [[T7_PRE:%.*]] = load ptr, ptr [[T6_PRE]], align 8 ; CHECK-NEXT: br label [[LOOP3_LATCH]] ; CHECK: loop3.latch: -; CHECK-NEXT: [[T2_PRE_PRE23]] = phi ptr [ [[T2_PRE_PRE24]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], [[FOR_BODY_LR_PH_I]] ] ; CHECK-NEXT: [[T1_PRE_PRE19]] = phi ptr [ [[T1_PRE_PRE20]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], [[FOR_BODY_LR_PH_I]] ] ; CHECK-NEXT: [[T3_PRE16]] = phi ptr [ [[T3_PRE17]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T7_PRE]], [[FOR_BODY_LR_PH_I]] ] ; CHECK-NEXT: [[T2_PRE13]] = phi ptr [ [[T2_PRE14]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], [[FOR_BODY_LR_PH_I]] ] @@ -54,7 +50,7 @@ define i32 @main(i64 %x, ptr %d, ptr noalias %p) { ; CHECK: loop2.latch.loop2_crit_edge: ; CHECK-NEXT: br label [[LOOP2]] ; CHECK: loop.latch: -; CHECK-NEXT: store i32 0, ptr [[D]], align 4, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: store i32 0, ptr [[D]], align 4, !tbaa [[TBAA0:![0-9]+]] ; CHECK-NEXT: br label [[LOOP]] ; entry: diff --git a/llvm/test/Transforms/JumpThreading/pre-load.ll b/llvm/test/Transforms/JumpThreading/pre-load.ll index eef7735acf448..4a3a80372973a 100644 --- a/llvm/test/Transforms/JumpThreading/pre-load.ll +++ b/llvm/test/Transforms/JumpThreading/pre-load.ll @@ -128,6 +128,54 @@ NO: call void @g() ret i32 1 } + +declare void @callee() memory(none) + +define i32 @pre_metadata_may_throw_speculative(i1 %cond) { +; CHECK-LABEL: @pre_metadata_may_throw_speculative( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[A:%.*]], label [[C:%.*]] +; CHECK: C.thread: +; CHECK-NEXT: store i32 0, ptr @x, align 4 +; CHECK-NEXT: call void @callee() +; CHECK-NEXT: br label [[YES:%.*]] +; CHECK: C: +; CHECK-NEXT: [[A_PR:%.*]] = load i32, ptr @x, align 4, !range [[RNG0]] +; CHECK-NEXT: call void @callee() +; CHECK-NEXT: [[COND2:%.*]] = icmp eq i32 [[A_PR]], 0 +; CHECK-NEXT: br i1 [[COND2]], label [[YES]], label [[NO:%.*]] +; CHECK: YES: +; CHECK-NEXT: [[A3:%.*]] = phi i32 [ 0, [[A]] ], [ [[A_PR]], [[C]] ] +; CHECK-NEXT: call void @f() +; CHECK-NEXT: ret i32 [[A3]] +; CHECK: NO: +; CHECK-NEXT: call void @g() +; CHECK-NEXT: ret i32 1 +; + br i1 %cond, label %A, label %B + +A: + store i32 0, ptr @x, align 4 + br label %C + +B: + br label %C + +C: + call void @callee() + %a = load i32, ptr @x, align 4, !range !{i32 0, i32 2}, !noundef !{} + %cond2 = icmp eq i32 %a, 0 + br i1 %cond2, label %YES, label %NO + +YES: + call void @f() + ret i32 %a + +NO: + call void @g() + ret i32 1 +} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { memory(none) } ;. ; CHECK: [[RNG0]] = !{i32 0, i32 2} ; CHECK: [[META1]] = !{} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll index 438a93c735796..80e7afafc9790 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll @@ -60,12 +60,12 @@ define dso_local void @_Z13vecIncFromPtrP12FloatVecPair(ptr %FVP) { ; O23-NEXT: [[ARRAYIDX_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[TMP3]], i64 undef ; O23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX_I4_I]], align 8, !tbaa [[TBAA8]] ; O23-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 undef -; O23-NEXT: [[DOTPRE_I:%.*]] = load float, ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9:![0-9]+]] +; O23-NEXT: [[DOTPRE_I:%.*]] = load float, ptr [[ARRAYIDX_I5_I]], align 4 ; O23-NEXT: br label [[FOR_BODY7_I:%.*]] ; O23: for.body7.i: ; O23-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE_I]], [[FOR_BODY7_LR_PH_I]] ], [ [[ADD_I:%.*]], [[FOR_BODY7_I]] ] ; O23-NEXT: [[J_07_I:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], [[FOR_BODY7_I]] ] -; O23-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9]] +; O23-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9:![0-9]+]] ; O23-NEXT: [[ADD_I]] = fadd float [[TMP5]], [[TMP6]] ; O23-NEXT: store float [[ADD_I]], ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9]] ; O23-NEXT: [[INC_I]] = add nuw i32 [[J_07_I]], 1 From 544f796aaf9c89e474928a3cca24c95647c09eb1 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 23 Apr 2025 16:53:16 +0800 Subject: [PATCH 5/6] [GVN][JumpThreading] Try to salvage UB-implying metadata --- llvm/lib/Transforms/Scalar/GVN.cpp | 20 +++++++++++++--- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 24 +++++++++++++------- llvm/test/Transforms/GVN/pr64598.ll | 18 +++++++++------ 3 files changed, 44 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index cdf2cfa311c65..1414f11e2412f 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1517,9 +1517,23 @@ void GVNPass::eliminatePartiallyRedundantLoad( } NewLoad->copyMetadata(*Load); - // Drop UB-implying metadata as we do not know if it is guaranteed to - // transfer the execution to the original load. - NewLoad->dropUBImplyingAttrsAndMetadata(); + std::optional TransfersExecution = std::nullopt; + NewLoad->eraseMetadataIf([&](unsigned Kind, const MDNode *MD) { + if (Kind == LLVMContext::MD_dbg || Kind == LLVMContext::MD_annotation) + return false; + if (is_contained(Metadata::PoisonGeneratingIDs, Kind)) + return false; + // Try to salvage UB-implying metadata if we know it is guaranteed to + // transfer the execution to the original load. + if (!TransfersExecution.has_value()) { + // TEST ONLY + assert( + is_contained(successors(NewLoad->getParent()), Load->getParent())); + TransfersExecution = isGuaranteedToTransferExecutionToSuccessor( + Load->getParent()->begin(), Load->getIterator()); + } + return !*TransfersExecution; + }); // Add the newly created load. ValuesPerBlock.push_back( diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 4bad53bc13e07..e7f7f811febf6 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1362,12 +1362,12 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { // farther than to a predecessor, we need to reuse the code from GVN's PRE. // It requires domination tree analysis, so for this simple case it is an // overkill. - bool TransfersExecution = false; + std::optional TransfersExecution = std::nullopt; if (PredsScanned.size() != AvailablePreds.size() && !isSafeToSpeculativelyExecute(LoadI)) { - for (auto I = LoadBB->begin(); &*I != LoadI; ++I) - if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) - return false; + if (!isGuaranteedToTransferExecutionToSuccessor(LoadBB->begin(), + LoadI->getIterator())) + return false; TransfersExecution = true; } @@ -1411,10 +1411,18 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { UnavailablePred->getTerminator()->getIterator()); NewVal->setDebugLoc(LoadI->getDebugLoc()); NewVal->copyMetadata(*LoadI); - // Drop UB-implying metadata if we do not know it is guaranteed to transfer - // the execution to the original load. - if (!TransfersExecution) - NewVal->dropUBImplyingAttrsAndMetadata(); + NewVal->eraseMetadataIf([&](unsigned Kind, const MDNode *MD) { + if (Kind == LLVMContext::MD_dbg || Kind == LLVMContext::MD_annotation) + return false; + if (is_contained(Metadata::PoisonGeneratingIDs, Kind)) + return false; + // Try to salvage UB-implying metadata if we know it is guaranteed to + // transfer the execution to the original load. + if (!TransfersExecution.has_value()) + TransfersExecution = isGuaranteedToTransferExecutionToSuccessor( + LoadBB->begin(), LoadI->getIterator()); + return !*TransfersExecution; + }); AvailablePreds.emplace_back(UnavailablePred, NewVal); } diff --git a/llvm/test/Transforms/GVN/pr64598.ll b/llvm/test/Transforms/GVN/pr64598.ll index e8e70a0edbd2e..902af984bce2b 100644 --- a/llvm/test/Transforms/GVN/pr64598.ll +++ b/llvm/test/Transforms/GVN/pr64598.ll @@ -6,20 +6,23 @@ define i32 @main(i64 %x, ptr %d, ptr noalias %p) { ; CHECK-SAME: (i64 [[X:%.*]], ptr [[D:%.*]], ptr noalias [[P:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[T1_PRE_PRE_PRE:%.*]] = load ptr, ptr [[P]], align 8 -; CHECK-NEXT: [[T2_PRE_PRE_PRE:%.*]] = load ptr, ptr [[T1_PRE_PRE_PRE]], align 8 +; CHECK-NEXT: [[T2_PRE_PRE_PRE:%.*]] = load ptr, ptr [[T1_PRE_PRE_PRE]], align 8, !tbaa [[TBAA0:![0-9]+]] ; CHECK-NEXT: [[T3_PRE_PRE_PRE:%.*]] = load ptr, ptr [[T2_PRE_PRE_PRE]], align 8 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[T1_PRE_PRE:%.*]] = phi ptr [ [[T1_PRE_PRE19:%.*]], [[LOOP_LATCH:%.*]] ], [ [[T1_PRE_PRE_PRE]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[T2_PRE_PRE:%.*]] = phi ptr [ [[T2_PRE_PRE23:%.*]], [[LOOP_LATCH:%.*]] ], [ [[T2_PRE_PRE_PRE]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[T1_PRE_PRE:%.*]] = phi ptr [ [[T1_PRE_PRE19:%.*]], [[LOOP_LATCH]] ], [ [[T1_PRE_PRE_PRE]], [[ENTRY]] ] ; CHECK-NEXT: br label [[LOOP2:%.*]] ; CHECK: loop2: -; CHECK-NEXT: [[T1_PRE_PRE21:%.*]] = phi ptr [ [[T1_PRE_PRE19]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE:%.*]] ], [ [[T1_PRE_PRE]], [[LOOP]] ] +; CHECK-NEXT: [[T2_PRE_PRE25:%.*]] = phi ptr [ [[T2_PRE_PRE23]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE:%.*]] ], [ [[T2_PRE_PRE]], [[LOOP]] ] +; CHECK-NEXT: [[T1_PRE_PRE21:%.*]] = phi ptr [ [[T1_PRE_PRE19]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T1_PRE_PRE]], [[LOOP]] ] ; CHECK-NEXT: [[T3_PRE:%.*]] = phi ptr [ [[T3_PRE16:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T3_PRE_PRE_PRE]], [[LOOP]] ] -; CHECK-NEXT: [[T2_PRE:%.*]] = phi ptr [ [[T2_PRE13:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T2_PRE_PRE_PRE]], [[LOOP]] ] +; CHECK-NEXT: [[T2_PRE:%.*]] = phi ptr [ [[T2_PRE13:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T2_PRE_PRE]], [[LOOP]] ] ; CHECK-NEXT: [[T1_PRE:%.*]] = phi ptr [ [[T1_PRE10:%.*]], [[LOOP2_LATCH_LOOP2_CRIT_EDGE]] ], [ [[T1_PRE_PRE]], [[LOOP]] ] ; CHECK-NEXT: br label [[LOOP3:%.*]] ; CHECK: loop3: -; CHECK-NEXT: [[T1_PRE_PRE20:%.*]] = phi ptr [ [[T1_PRE_PRE19]], [[LOOP3_LATCH:%.*]] ], [ [[T1_PRE_PRE21]], [[LOOP2]] ] +; CHECK-NEXT: [[T2_PRE_PRE24:%.*]] = phi ptr [ [[T2_PRE_PRE23]], [[LOOP3_LATCH:%.*]] ], [ [[T2_PRE_PRE25]], [[LOOP2]] ] +; CHECK-NEXT: [[T1_PRE_PRE20:%.*]] = phi ptr [ [[T1_PRE_PRE19]], [[LOOP3_LATCH]] ], [ [[T1_PRE_PRE21]], [[LOOP2]] ] ; CHECK-NEXT: [[T3_PRE17:%.*]] = phi ptr [ [[T3_PRE16]], [[LOOP3_LATCH]] ], [ [[T3_PRE]], [[LOOP2]] ] ; CHECK-NEXT: [[T2_PRE14:%.*]] = phi ptr [ [[T2_PRE13]], [[LOOP3_LATCH]] ], [ [[T2_PRE]], [[LOOP2]] ] ; CHECK-NEXT: [[T1_PRE11:%.*]] = phi ptr [ [[T1_PRE10]], [[LOOP3_LATCH]] ], [ [[T1_PRE]], [[LOOP2]] ] @@ -33,10 +36,11 @@ define i32 @main(i64 %x, ptr %d, ptr noalias %p) { ; CHECK: for.body.lr.ph.i: ; CHECK-NEXT: store i32 0, ptr [[P]], align 4 ; CHECK-NEXT: [[T5_PRE:%.*]] = load ptr, ptr [[P]], align 8 -; CHECK-NEXT: [[T6_PRE:%.*]] = load ptr, ptr [[T5_PRE]], align 8 +; CHECK-NEXT: [[T6_PRE:%.*]] = load ptr, ptr [[T5_PRE]], align 8, !tbaa [[TBAA0]] ; CHECK-NEXT: [[T7_PRE:%.*]] = load ptr, ptr [[T6_PRE]], align 8 ; CHECK-NEXT: br label [[LOOP3_LATCH]] ; CHECK: loop3.latch: +; CHECK-NEXT: [[T2_PRE_PRE23]] = phi ptr [ [[T2_PRE_PRE24]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], [[FOR_BODY_LR_PH_I]] ] ; CHECK-NEXT: [[T1_PRE_PRE19]] = phi ptr [ [[T1_PRE_PRE20]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T5_PRE]], [[FOR_BODY_LR_PH_I]] ] ; CHECK-NEXT: [[T3_PRE16]] = phi ptr [ [[T3_PRE17]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T7_PRE]], [[FOR_BODY_LR_PH_I]] ] ; CHECK-NEXT: [[T2_PRE13]] = phi ptr [ [[T2_PRE14]], [[LOOP3_LOOP3_LATCH_CRIT_EDGE]] ], [ [[T6_PRE]], [[FOR_BODY_LR_PH_I]] ] @@ -50,7 +54,7 @@ define i32 @main(i64 %x, ptr %d, ptr noalias %p) { ; CHECK: loop2.latch.loop2_crit_edge: ; CHECK-NEXT: br label [[LOOP2]] ; CHECK: loop.latch: -; CHECK-NEXT: store i32 0, ptr [[D]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: store i32 0, ptr [[D]], align 4, !tbaa [[TBAA4:![0-9]+]] ; CHECK-NEXT: br label [[LOOP]] ; entry: From a88645be68315451a520b4d6d45a44f78e8b15d0 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 28 Apr 2025 17:59:42 +0800 Subject: [PATCH 6/6] [GVN] Revert gvn changes --- llvm/lib/Transforms/Scalar/GVN.cpp | 38 ++++++++++--------- .../MemoryDependenceAnalysis/InvariantLoad.ll | 12 +++--- .../invariant.group-bug.ll | 2 +- .../test/Transforms/GVN/PRE/invariant-load.ll | 2 +- llvm/test/Transforms/GVN/PRE/load-metadata.ll | 2 +- .../PRE/load-pre-metadata-accsess-group.ll | 10 ++--- llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll | 2 +- .../PhaseOrdering/X86/spurious-peeling.ll | 4 +- 8 files changed, 37 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 1414f11e2412f..6233e8e2ee681 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1516,24 +1516,26 @@ void GVNPass::eliminatePartiallyRedundantLoad( MSSAU->insertUse(cast(NewAccess), /*RenameUses=*/true); } - NewLoad->copyMetadata(*Load); - std::optional TransfersExecution = std::nullopt; - NewLoad->eraseMetadataIf([&](unsigned Kind, const MDNode *MD) { - if (Kind == LLVMContext::MD_dbg || Kind == LLVMContext::MD_annotation) - return false; - if (is_contained(Metadata::PoisonGeneratingIDs, Kind)) - return false; - // Try to salvage UB-implying metadata if we know it is guaranteed to - // transfer the execution to the original load. - if (!TransfersExecution.has_value()) { - // TEST ONLY - assert( - is_contained(successors(NewLoad->getParent()), Load->getParent())); - TransfersExecution = isGuaranteedToTransferExecutionToSuccessor( - Load->getParent()->begin(), Load->getIterator()); - } - return !*TransfersExecution; - }); + // Transfer the old load's AA tags to the new load. + AAMDNodes Tags = Load->getAAMetadata(); + if (Tags) + NewLoad->setAAMetadata(Tags); + + if (auto *MD = Load->getMetadata(LLVMContext::MD_invariant_load)) + NewLoad->setMetadata(LLVMContext::MD_invariant_load, MD); + if (auto *InvGroupMD = Load->getMetadata(LLVMContext::MD_invariant_group)) + NewLoad->setMetadata(LLVMContext::MD_invariant_group, InvGroupMD); + if (auto *RangeMD = Load->getMetadata(LLVMContext::MD_range)) + NewLoad->setMetadata(LLVMContext::MD_range, RangeMD); + if (auto *AccessMD = Load->getMetadata(LLVMContext::MD_access_group)) + if (LI->getLoopFor(Load->getParent()) == LI->getLoopFor(UnavailableBlock)) + NewLoad->setMetadata(LLVMContext::MD_access_group, AccessMD); + + // We do not propagate the old load's debug location, because the new + // load now lives in a different BB, and we want to avoid a jumpy line + // table. + // FIXME: How do we retain source locations without causing poor debugging + // behavior? // Add the newly created load. ValuesPerBlock.push_back( diff --git a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll b/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll index e98b635dc6086..e49db3d8c3e8e 100644 --- a/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll +++ b/llvm/test/Analysis/MemoryDependenceAnalysis/InvariantLoad.ll @@ -10,8 +10,8 @@ declare void @foo(ptr) define i8 @test(i1 %cmp) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[P:%.*]] = alloca i8, align 1 -; CHECK-NEXT: store i8 5, ptr [[P]], align 1 +; CHECK-NEXT: [[P:%.*]] = alloca i8 +; CHECK-NEXT: store i8 5, ptr [[P]] ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: ; CHECK-NEXT: [[V:%.*]] = phi i8 [ 5, [[ENTRY:%.*]] ], [ -5, [[ALIVE:%.*]] ] @@ -23,7 +23,7 @@ define i8 @test(i1 %cmp) { ; CHECK-NEXT: br label [[ALIVE]] ; CHECK: alive: ; CHECK-NEXT: [[I_2:%.*]] = phi i8 [ [[I]], [[HEADER]] ], [ [[I_1]], [[DEAD]] ] -; CHECK-NEXT: store i8 -5, ptr [[P]], align 1 +; CHECK-NEXT: store i8 -5, ptr [[P]] ; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[P]], i8 0, i32 1, i1 false) ; CHECK-NEXT: [[I_INC]] = add i8 [[I_2]], 1 ; CHECK-NEXT: [[CMP_LOOP:%.*]] = icmp ugt i8 [[I_INC]], 100 @@ -67,7 +67,7 @@ define i8 @test2(i1 %cmp, ptr %p) { ; CHECK-NEXT: call void @foo(ptr [[P]]) ; CHECK-NEXT: br i1 [[CMP:%.*]], label [[B2:%.*]], label [[B1:%.*]] ; CHECK: b1: -; CHECK-NEXT: [[RES2:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[RES2:%.*]] = load i8, ptr [[P]] ; CHECK-NEXT: [[RES3:%.*]] = add i8 [[RES1]], [[RES2]] ; CHECK-NEXT: br label [[ALIVE:%.*]] ; CHECK: b2: @@ -105,7 +105,7 @@ define i8 @test3(i1 %cmp, ptr %p) { ; CHECK-NEXT: call void @foo(ptr [[P]]) ; CHECK-NEXT: br i1 [[CMP:%.*]], label [[B1:%.*]], label [[B2:%.*]] ; CHECK: b1: -; CHECK-NEXT: [[RES2:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[RES2:%.*]] = load i8, ptr [[P]] ; CHECK-NEXT: [[RES3:%.*]] = add i8 [[RES1]], [[RES2]] ; CHECK-NEXT: br label [[ALIVE:%.*]] ; CHECK: b2: @@ -148,7 +148,7 @@ define void @test4() null_pointer_is_valid { ; CHECK-NEXT: [[TMP4:%.*]] = fmul float [[TMP2]], [[TMP2]] ; CHECK-NEXT: [[INVAR_INC3]] = add nuw nsw i64 [[FUSION_INVAR_ADDRESS_DIM_0_03]], 1 ; CHECK-NEXT: [[DOTPHI_TRANS_INSERT:%.*]] = getelementptr inbounds [2 x [1 x [4 x float]]], ptr null, i64 0, i64 [[INVAR_INC3]], i64 0, i64 2 -; CHECK-NEXT: [[DOTPRE]] = load float, ptr [[DOTPHI_TRANS_INSERT]], align 4 +; CHECK-NEXT: [[DOTPRE]] = load float, ptr [[DOTPHI_TRANS_INSERT]], align 4, !invariant.load !0 ; CHECK-NEXT: br label [[FUSION_LOOP_HEADER_DIM_1_PREHEADER]] ; entry: diff --git a/llvm/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll b/llvm/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll index 802c969eae2fb..c11191ee96d01 100644 --- a/llvm/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll +++ b/llvm/test/Analysis/MemoryDependenceAnalysis/invariant.group-bug.ll @@ -29,7 +29,7 @@ define void @fail(ptr noalias sret(i1) %arg, ptr %arg1, ptr %arg2, ptr %arg3, i1 ; CHECK-NEXT: br i1 [[ARG4:%.*]], label [[BB10:%.*]], label [[BB29:%.*]] ; CHECK: bb10: ; CHECK-NEXT: [[I14_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds ptr, ptr [[I4]], i64 22 -; CHECK-NEXT: [[I15_PRE:%.*]] = load ptr, ptr [[I14_PHI_TRANS_INSERT]], align 8 +; CHECK-NEXT: [[I15_PRE:%.*]] = load ptr, ptr [[I14_PHI_TRANS_INSERT]], align 8, !invariant.load [[META6]] ; CHECK-NEXT: br label [[BB12:%.*]] ; CHECK: bb12: ; CHECK-NEXT: [[I16:%.*]] = call i64 [[I15_PRE]](ptr nonnull [[ARG1]], ptr null, i64 0) #[[ATTR1]] diff --git a/llvm/test/Transforms/GVN/PRE/invariant-load.ll b/llvm/test/Transforms/GVN/PRE/invariant-load.ll index 98df8b45951b3..71332bf0e0bdb 100644 --- a/llvm/test/Transforms/GVN/PRE/invariant-load.ll +++ b/llvm/test/Transforms/GVN/PRE/invariant-load.ll @@ -162,7 +162,7 @@ define i32 @test8(i1 %cnd, ptr %p) { ; CHECK-NEXT: br i1 [[CND]], label [[TAKEN:%.*]], label [[MERGE:%.*]] ; CHECK: taken: ; CHECK-NEXT: [[P2:%.*]] = call ptr (...) @bar(ptr [[P]]) -; CHECK-NEXT: [[V2_PRE:%.*]] = load i32, ptr [[P2]], align 4 +; CHECK-NEXT: [[V2_PRE:%.*]] = load i32, ptr [[P2]], align 4, !invariant.load [[META0]] ; CHECK-NEXT: br label [[MERGE]] ; CHECK: merge: ; CHECK-NEXT: [[V2:%.*]] = phi i32 [ [[V1]], [[ENTRY:%.*]] ], [ [[V2_PRE]], [[TAKEN]] ] diff --git a/llvm/test/Transforms/GVN/PRE/load-metadata.ll b/llvm/test/Transforms/GVN/PRE/load-metadata.ll index b97854d576383..415812be95b3a 100644 --- a/llvm/test/Transforms/GVN/PRE/load-metadata.ll +++ b/llvm/test/Transforms/GVN/PRE/load-metadata.ll @@ -8,7 +8,7 @@ block1: block2: br label %block4 ; CHECK: block2: -; CHECK-NEXT: load i32, ptr %p, align 4, !range !0 +; CHECK-NEXT: load i32, ptr %p, align 4, !range !0, !invariant.group !1 block3: store i32 0, ptr %p diff --git a/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll b/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll index 303064f6d8586..148e308b116ed 100644 --- a/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll +++ b/llvm/test/Transforms/GVN/PRE/load-pre-metadata-accsess-group.ll @@ -13,14 +13,14 @@ define dso_local void @test1(ptr nocapture readonly %aa, ptr nocapture %bb) loca ; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[DOTPRE:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ] ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ] ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, ptr [[AA]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[IDX4]], align 4, !llvm.access.group [[ACC_GRP0:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[IDX4]], align 4, !llvm.access.group !0 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP1]], [[TMP2]] -; CHECK-NEXT: store i32 [[MUL]], ptr [[IDX4]], align 4, !llvm.access.group [[ACC_GRP0]] +; CHECK-NEXT: store i32 [[MUL]], ptr [[IDX4]], align 4, !llvm.access.group !0 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]], label [[FOR_END:%.*]] ; CHECK: for.body.for.body_crit_edge: -; CHECK-NEXT: [[DOTPRE]] = load i32, ptr [[IDX]], align 4 +; CHECK-NEXT: [[DOTPRE]] = load i32, ptr [[IDX]], align 4, !llvm.access.group !0 ; CHECK-NEXT: br label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -65,10 +65,10 @@ define dso_local void @test2(ptr nocapture readonly %aa, ptr nocapture %bb) loca ; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_BODY]] ], [ [[MUL:%.*]], [[FOR_BODY2_FOR_BODY2_CRIT_EDGE]] ] ; CHECK-NEXT: [[INDVARS2_IV:%.*]] = phi i64 [ 0, [[FOR_BODY]] ], [ 1, [[FOR_BODY2_FOR_BODY2_CRIT_EDGE]] ] ; CHECK-NEXT: [[MUL]] = mul nsw i32 [[TMP1]], [[TMP2]] -; CHECK-NEXT: store i32 [[MUL]], ptr [[AA]], align 4, !llvm.access.group [[ACC_GRP1:![0-9]+]] +; CHECK-NEXT: store i32 [[MUL]], ptr [[AA]], align 4, !llvm.access.group !1 ; CHECK-NEXT: br i1 true, label [[FOR_BODY2_FOR_BODY2_CRIT_EDGE]], label [[FOR_END:%.*]] ; CHECK: for.body2.for.body2_crit_edge: -; CHECK-NEXT: [[DOTPRE1]] = load i32, ptr [[IDX]], align 4 +; CHECK-NEXT: [[DOTPRE1]] = load i32, ptr [[IDX]], align 4, !llvm.access.group !1 ; CHECK-NEXT: br label [[FOR_BODY2]] ; CHECK: for.end: ; CHECK-NEXT: br i1 false, label [[FOR_END_FOR_BODY_CRIT_EDGE:%.*]], label [[END:%.*]] diff --git a/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll b/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll index 6f1a645ca0c84..3df63beefea68 100644 --- a/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll +++ b/llvm/test/Transforms/GVN/PRE/preserve-tbaa.ll @@ -5,7 +5,7 @@ target datalayout = "e-p:64:64:64" ; GVN should preserve the TBAA tag on loads when doing PRE. ; CHECK-LABEL: @test( -; CHECK: %tmp33.pre = load i16, ptr %P, align 2 +; CHECK: %tmp33.pre = load i16, ptr %P, align 2, !tbaa !0 ; CHECK: br label %for.body define void @test(ptr %P, ptr %Q, i1 %arg) nounwind { entry: diff --git a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll index 80e7afafc9790..438a93c735796 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll @@ -60,12 +60,12 @@ define dso_local void @_Z13vecIncFromPtrP12FloatVecPair(ptr %FVP) { ; O23-NEXT: [[ARRAYIDX_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[TMP3]], i64 undef ; O23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARRAYIDX_I4_I]], align 8, !tbaa [[TBAA8]] ; O23-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, ptr [[TMP4]], i64 undef -; O23-NEXT: [[DOTPRE_I:%.*]] = load float, ptr [[ARRAYIDX_I5_I]], align 4 +; O23-NEXT: [[DOTPRE_I:%.*]] = load float, ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9:![0-9]+]] ; O23-NEXT: br label [[FOR_BODY7_I:%.*]] ; O23: for.body7.i: ; O23-NEXT: [[TMP5:%.*]] = phi float [ [[DOTPRE_I]], [[FOR_BODY7_LR_PH_I]] ], [ [[ADD_I:%.*]], [[FOR_BODY7_I]] ] ; O23-NEXT: [[J_07_I:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH_I]] ], [ [[INC_I:%.*]], [[FOR_BODY7_I]] ] -; O23-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9:![0-9]+]] +; O23-NEXT: [[TMP6:%.*]] = load float, ptr [[ARRAYIDX_I3_I]], align 4, !tbaa [[TBAA9]] ; O23-NEXT: [[ADD_I]] = fadd float [[TMP5]], [[TMP6]] ; O23-NEXT: store float [[ADD_I]], ptr [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA9]] ; O23-NEXT: [[INC_I]] = add nuw i32 [[J_07_I]], 1