Skip to content

Commit 9679735

Browse files
authored
[AMDGPU] Don't unify divergent exit nodes with musttail calls (llvm#126395)
Fixes SWDEV-512254.
1 parent 2e3729b commit 9679735

File tree

2 files changed

+57
-1
lines changed

2 files changed

+57
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,10 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
215215
PDT.roots(), [&](auto BB) { return !isUniformlyReached(UA, *BB); });
216216

217217
for (BasicBlock *BB : PDT.roots()) {
218-
if (isa<ReturnInst>(BB->getTerminator())) {
218+
if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
219+
auto *CI = dyn_cast_or_null<CallInst>(RI->getPrevNode());
220+
if (CI && CI->isMustTailCall())
221+
continue;
219222
if (HasDivergentExitBlock)
220223
ReturningBlocks.push_back(BB);
221224
} else if (isa<UnreachableInst>(BB->getTerminator())) {
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -passes=amdgpu-unify-divergent-exit-nodes -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
3+
4+
declare void @foo(ptr)
5+
declare i1 @bar(ptr)
6+
7+
define void @musttail_call_without_return_value(ptr %p) {
8+
; CHECK-LABEL: define void @musttail_call_without_return_value(
9+
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
10+
; CHECK-NEXT: [[ENTRY:.*:]]
11+
; CHECK-NEXT: [[LOAD:%.*]] = load i1, ptr [[P]], align 1
12+
; CHECK-NEXT: br i1 [[LOAD]], label %[[BB_0:.*]], label %[[BB_1:.*]]
13+
; CHECK: [[BB_0]]:
14+
; CHECK-NEXT: musttail call void @foo(ptr [[P]])
15+
; CHECK-NEXT: ret void
16+
; CHECK: [[BB_1]]:
17+
; CHECK-NEXT: ret void
18+
;
19+
entry:
20+
%load = load i1, ptr %p, align 1
21+
br i1 %load, label %bb.0, label %bb.1
22+
23+
bb.0:
24+
musttail call void @foo(ptr %p)
25+
ret void
26+
27+
bb.1:
28+
ret void
29+
}
30+
31+
define i1 @musttail_call_with_return_value(ptr %p) {
32+
; CHECK-LABEL: define i1 @musttail_call_with_return_value(
33+
; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
34+
; CHECK-NEXT: [[ENTRY:.*:]]
35+
; CHECK-NEXT: [[LOAD:%.*]] = load i1, ptr [[P]], align 1
36+
; CHECK-NEXT: br i1 [[LOAD]], label %[[BB_0:.*]], label %[[BB_1:.*]]
37+
; CHECK: [[BB_0]]:
38+
; CHECK-NEXT: [[RET:%.*]] = musttail call i1 @bar(ptr [[P]])
39+
; CHECK-NEXT: ret i1 [[RET]]
40+
; CHECK: [[BB_1]]:
41+
; CHECK-NEXT: ret i1 [[LOAD]]
42+
;
43+
entry:
44+
%load = load i1, ptr %p, align 1
45+
br i1 %load, label %bb.0, label %bb.1
46+
47+
bb.0:
48+
%ret = musttail call i1 @bar(ptr %p)
49+
ret i1 %ret
50+
51+
bb.1:
52+
ret i1 %load
53+
}

0 commit comments

Comments
 (0)