Skip to content

Commit d88cd35

Browse files
huaatiantstellar
authored andcommitted
[llvm][CodeGen] Fix the empty interval issue in Window Scheduler (#129204)
The interval of newly generated reg in ModuloScheduleExpander is empty. This will cause crash at some corner case. This patch recalculate the live intervals of these regs. (cherry picked from commit b09b9ac)
1 parent 73d1e85 commit d88cd35

File tree

3 files changed

+215
-21
lines changed

3 files changed

+215
-21
lines changed

llvm/include/llvm/CodeGen/ModuloSchedule.h

+4
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,9 @@ class ModuloScheduleExpander {
188188
/// Instructions to change when emitting the final schedule.
189189
InstrChangesTy InstrChanges;
190190

191+
/// Record the registers that need to compute live intervals.
192+
SmallVector<Register> NoIntervalRegs;
193+
191194
void generatePipelinedLoop();
192195
void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
193196
ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
@@ -211,6 +214,7 @@ class ModuloScheduleExpander {
211214
void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
212215
MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
213216
ValueMapTy *VRMap);
217+
void calculateIntervals();
214218
bool computeDelta(MachineInstr &MI, unsigned &Delta);
215219
void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
216220
unsigned Num);

llvm/lib/CodeGen/ModuloSchedule.cpp

+54-21
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
141141
MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
142142
updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
143143
KernelBB->push_back(NewMI);
144+
LIS.InsertMachineInstrInMaps(*NewMI);
144145
InstrMap[NewMI] = CI;
145146
}
146147

@@ -150,6 +151,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
150151
MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
151152
updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
152153
KernelBB->push_back(NewMI);
154+
LIS.InsertMachineInstrInMaps(*NewMI);
153155
InstrMap[NewMI] = &MI;
154156
}
155157

@@ -179,6 +181,10 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
179181
// Add branches between prolog and epilog blocks.
180182
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
181183

184+
// The intervals of newly created virtual registers are calculated after the
185+
// kernel expansion.
186+
calculateIntervals();
187+
182188
delete[] VRMap;
183189
delete[] VRMapPhi;
184190
}
@@ -226,6 +232,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
226232
cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
227233
updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
228234
NewBB->push_back(NewMI);
235+
LIS.InsertMachineInstrInMaps(*NewMI);
229236
InstrMap[NewMI] = &*BBI;
230237
}
231238
}
@@ -303,6 +310,7 @@ void ModuloScheduleExpander::generateEpilog(
303310
MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
304311
updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
305312
NewBB->push_back(NewMI);
313+
LIS.InsertMachineInstrInMaps(*NewMI);
306314
InstrMap[NewMI] = In;
307315
}
308316
}
@@ -343,14 +351,11 @@ void ModuloScheduleExpander::generateEpilog(
343351
/// basic block with ToReg.
344352
static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
345353
MachineBasicBlock *MBB,
346-
MachineRegisterInfo &MRI,
347-
LiveIntervals &LIS) {
354+
MachineRegisterInfo &MRI) {
348355
for (MachineOperand &O :
349356
llvm::make_early_inc_range(MRI.use_operands(FromReg)))
350357
if (O.getParent()->getParent() != MBB)
351358
O.setReg(ToReg);
352-
if (!LIS.hasInterval(ToReg))
353-
LIS.createEmptyInterval(ToReg);
354359
}
355360

356361
/// Return true if the register has a use that occurs outside the
@@ -541,8 +546,10 @@ void ModuloScheduleExpander::generateExistingPhis(
541546
if (VRMap[LastStageNum - np - 1].count(LoopVal))
542547
PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
543548

544-
if (IsLast && np == NumPhis - 1)
545-
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
549+
if (IsLast && np == NumPhis - 1) {
550+
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
551+
NoIntervalRegs.push_back(NewReg);
552+
}
546553
continue;
547554
}
548555
}
@@ -560,6 +567,7 @@ void ModuloScheduleExpander::generateExistingPhis(
560567
TII->get(TargetOpcode::PHI), NewReg);
561568
NewPhi.addReg(PhiOp1).addMBB(BB1);
562569
NewPhi.addReg(PhiOp2).addMBB(BB2);
570+
LIS.InsertMachineInstrInMaps(*NewPhi);
563571
if (np == 0)
564572
InstrMap[NewPhi] = &*BBI;
565573

@@ -581,8 +589,10 @@ void ModuloScheduleExpander::generateExistingPhis(
581589
// Check if we need to rename any uses that occurs after the loop. The
582590
// register to replace depends on whether the Phi is scheduled in the
583591
// epilog.
584-
if (IsLast && np == NumPhis - 1)
585-
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
592+
if (IsLast && np == NumPhis - 1) {
593+
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
594+
NoIntervalRegs.push_back(NewReg);
595+
}
586596

587597
// In the kernel, a dependent Phi uses the value from this Phi.
588598
if (InKernel)
@@ -600,9 +610,12 @@ void ModuloScheduleExpander::generateExistingPhis(
600610
// Check if we need to rename a Phi that has been eliminated due to
601611
// scheduling.
602612
if (NumStages == 0 && IsLast) {
603-
auto It = VRMap[CurStageNum].find(LoopVal);
604-
if (It != VRMap[CurStageNum].end())
605-
replaceRegUsesAfterLoop(Def, It->second, BB, MRI, LIS);
613+
auto &CurStageMap = VRMap[CurStageNum];
614+
auto It = CurStageMap.find(LoopVal);
615+
if (It != CurStageMap.end()) {
616+
replaceRegUsesAfterLoop(Def, It->second, BB, MRI);
617+
NoIntervalRegs.push_back(It->second);
618+
}
606619
}
607620
}
608621
}
@@ -702,6 +715,7 @@ void ModuloScheduleExpander::generatePhis(
702715
TII->get(TargetOpcode::PHI), NewReg);
703716
NewPhi.addReg(PhiOp1).addMBB(BB1);
704717
NewPhi.addReg(PhiOp2).addMBB(BB2);
718+
LIS.InsertMachineInstrInMaps(*NewPhi);
705719
if (np == 0)
706720
InstrMap[NewPhi] = &*BBI;
707721

@@ -721,8 +735,10 @@ void ModuloScheduleExpander::generatePhis(
721735
rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
722736
NewReg);
723737
}
724-
if (IsLast && np == NumPhis - 1)
725-
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
738+
if (IsLast && np == NumPhis - 1) {
739+
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
740+
NoIntervalRegs.push_back(NewReg);
741+
}
726742
}
727743
}
728744
}
@@ -831,9 +847,11 @@ void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
831847
// We split the lifetime when we find the first use.
832848
if (SplitReg == 0) {
833849
SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
834-
BuildMI(*KernelBB, MI, MI->getDebugLoc(),
835-
TII->get(TargetOpcode::COPY), SplitReg)
836-
.addReg(Def);
850+
MachineInstr *newCopy =
851+
BuildMI(*KernelBB, MI, MI->getDebugLoc(),
852+
TII->get(TargetOpcode::COPY), SplitReg)
853+
.addReg(Def);
854+
LIS.InsertMachineInstrInMaps(*newCopy);
837855
}
838856
BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
839857
}
@@ -901,13 +919,17 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
901919
removePhis(Epilog, LastEpi);
902920
// Remove the blocks that are no longer referenced.
903921
if (LastPro != LastEpi) {
922+
for (auto &MI : *LastEpi)
923+
LIS.RemoveMachineInstrFromMaps(MI);
904924
LastEpi->clear();
905925
LastEpi->eraseFromParent();
906926
}
907927
if (LastPro == KernelBB) {
908928
LoopInfo->disposed(&LIS);
909929
NewKernel = nullptr;
910930
}
931+
for (auto &MI : *LastPro)
932+
LIS.RemoveMachineInstrFromMaps(MI);
911933
LastPro->clear();
912934
LastPro->eraseFromParent();
913935
} else {
@@ -928,6 +950,14 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
928950
}
929951
}
930952

953+
/// Some registers are generated during the kernel expansion. We calculate the
954+
/// live intervals of these registers after the expansion.
955+
void ModuloScheduleExpander::calculateIntervals() {
956+
for (Register Reg : NoIntervalRegs)
957+
LIS.createAndComputeVirtRegInterval(Reg);
958+
NoIntervalRegs.clear();
959+
}
960+
931961
/// Return true if we can compute the amount the instruction changes
932962
/// during each iteration. Set Delta to the amount of the change.
933963
bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
@@ -1048,8 +1078,10 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
10481078
Register NewReg = MRI.createVirtualRegister(RC);
10491079
MO.setReg(NewReg);
10501080
VRMap[CurStageNum][reg] = NewReg;
1051-
if (LastDef)
1052-
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1081+
if (LastDef) {
1082+
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI);
1083+
NoIntervalRegs.push_back(NewReg);
1084+
}
10531085
} else if (MO.isUse()) {
10541086
MachineInstr *Def = MRI.getVRegDef(reg);
10551087
// Compute the stage that contains the last definition for instruction.
@@ -1198,10 +1230,11 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
11981230
UseOp.setReg(ReplaceReg);
11991231
else {
12001232
Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1201-
BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
1202-
SplitReg)
1203-
.addReg(ReplaceReg);
1233+
MachineInstr *newCopy = BuildMI(*BB, UseMI, UseMI->getDebugLoc(),
1234+
TII->get(TargetOpcode::COPY), SplitReg)
1235+
.addReg(ReplaceReg);
12041236
UseOp.setReg(SplitReg);
1237+
LIS.InsertMachineInstrInMaps(*newCopy);
12051238
}
12061239
}
12071240
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc --mtriple=hexagon %s -run-pass=pipeliner -o -| FileCheck %s
3+
4+
--- |
5+
define void @test_swp_ws_live_intervals(i32 %.pre) {
6+
entry:
7+
%cgep9 = bitcast ptr null to ptr
8+
br label %for.body147
9+
10+
for.body147: ; preds = %for.body170, %entry
11+
%add11.i526 = or i32 %.pre, 1
12+
br label %for.body158
13+
14+
for.body158: ; preds = %for.body158, %for.body147
15+
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body158 ], [ -1, %for.body147 ]
16+
%add11.i536602603 = phi i32 [ %add11.i526, %for.body147 ], [ 0, %for.body158 ]
17+
%and8.i534 = and i32 %add11.i536602603, 1
18+
%cgep7 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and8.i534
19+
store i32 0, ptr %cgep7, align 4
20+
%lsr.iv.next = add nsw i32 %lsr.iv, 1
21+
%cmp157.3 = icmp ult i32 %lsr.iv.next, 510
22+
br i1 %cmp157.3, label %for.body158, label %for.body170
23+
24+
for.body170: ; preds = %for.body170, %for.body158
25+
%lsr.iv3 = phi ptr [ %cgep6, %for.body170 ], [ inttoptr (i32 4 to ptr), %for.body158 ]
26+
%lsr.iv1 = phi i32 [ %lsr.iv.next2, %for.body170 ], [ -1, %for.body158 ]
27+
%add11.i556606607 = phi i32 [ 0, %for.body170 ], [ 1, %for.body158 ]
28+
%cgep5 = getelementptr i8, ptr %lsr.iv3, i32 -4
29+
store i32 0, ptr %cgep5, align 8
30+
%sub.i547.1 = add i32 %add11.i556606607, 1
31+
%and.i548.1 = and i32 %sub.i547.1, 1
32+
%cgep8 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and.i548.1
33+
%0 = load i32, ptr %cgep8, align 4
34+
store i32 %0, ptr %lsr.iv3, align 4
35+
%lsr.iv.next2 = add nsw i32 %lsr.iv1, 1
36+
%cmp169.1 = icmp ult i32 %lsr.iv.next2, 254
37+
%cgep6 = getelementptr i8, ptr %lsr.iv3, i32 2
38+
br i1 %cmp169.1, label %for.body170, label %for.body147
39+
}
40+
41+
...
42+
---
43+
name: test_swp_ws_live_intervals
44+
tracksRegLiveness: true
45+
body: |
46+
; CHECK-LABEL: name: test_swp_ws_live_intervals
47+
; CHECK: bb.0.entry:
48+
; CHECK-NEXT: successors: %bb.1(0x80000000)
49+
; CHECK-NEXT: liveins: $r0
50+
; CHECK-NEXT: {{ $}}
51+
; CHECK-NEXT: [[COPY:%[0-9]+]]:intregs = COPY $r0
52+
; CHECK-NEXT: [[S2_setbit_i:%[0-9]+]]:intregs = S2_setbit_i [[COPY]], 0
53+
; CHECK-NEXT: {{ $}}
54+
; CHECK-NEXT: bb.1:
55+
; CHECK-NEXT: successors: %bb.5(0x80000000)
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: bb.5:
58+
; CHECK-NEXT: successors: %bb.6(0x80000000)
59+
; CHECK-NEXT: {{ $}}
60+
; CHECK-NEXT: [[A2_andir:%[0-9]+]]:intregs = A2_andir [[S2_setbit_i]], 1
61+
; CHECK-NEXT: [[S2_asl_i_r:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir]], 2
62+
; CHECK-NEXT: [[A2_tfrsi:%[0-9]+]]:intregs = A2_tfrsi 1
63+
; CHECK-NEXT: [[A2_tfrsi1:%[0-9]+]]:intregs = A2_tfrsi 4
64+
; CHECK-NEXT: [[A2_tfrsi2:%[0-9]+]]:intregs = A2_tfrsi 0
65+
; CHECK-NEXT: J2_loop0i %bb.6, 510, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
66+
; CHECK-NEXT: J2_jump %bb.6, implicit-def $pc
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: bb.6:
69+
; CHECK-NEXT: successors: %bb.6(0x7c000000), %bb.7(0x04000000)
70+
; CHECK-NEXT: {{ $}}
71+
; CHECK-NEXT: [[PHI:%[0-9]+]]:intregs = PHI [[A2_tfrsi2]], %bb.5, %24, %bb.6
72+
; CHECK-NEXT: [[PHI1:%[0-9]+]]:intregs = PHI [[S2_asl_i_r]], %bb.5, %23, %bb.6
73+
; CHECK-NEXT: S4_storeiri_io [[PHI1]], 0, 0 :: (store (s32) into %ir.cgep7)
74+
; CHECK-NEXT: [[A2_andir1:%[0-9]+]]:intregs = A2_andir [[PHI]], 1
75+
; CHECK-NEXT: [[A2_tfrsi3:%[0-9]+]]:intregs = A2_tfrsi 1
76+
; CHECK-NEXT: [[A2_tfrsi4:%[0-9]+]]:intregs = A2_tfrsi 4
77+
; CHECK-NEXT: [[S2_asl_i_r1:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir1]], 2
78+
; CHECK-NEXT: [[A2_tfrsi5:%[0-9]+]]:intregs = A2_tfrsi 0
79+
; CHECK-NEXT: ENDLOOP0 %bb.6, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
80+
; CHECK-NEXT: J2_jump %bb.7, implicit-def $pc
81+
; CHECK-NEXT: {{ $}}
82+
; CHECK-NEXT: bb.7:
83+
; CHECK-NEXT: successors: %bb.3(0x80000000)
84+
; CHECK-NEXT: {{ $}}
85+
; CHECK-NEXT: [[PHI2:%[0-9]+]]:intregs = PHI [[S2_asl_i_r1]], %bb.6
86+
; CHECK-NEXT: [[PHI3:%[0-9]+]]:intregs = PHI [[A2_tfrsi3]], %bb.6
87+
; CHECK-NEXT: [[PHI4:%[0-9]+]]:intregs = PHI [[A2_tfrsi4]], %bb.6
88+
; CHECK-NEXT: S4_storeiri_io [[PHI2]], 0, 0 :: (store unknown-size into %ir.cgep7, align 4)
89+
; CHECK-NEXT: J2_jump %bb.3, implicit-def $pc
90+
; CHECK-NEXT: {{ $}}
91+
; CHECK-NEXT: bb.3:
92+
; CHECK-NEXT: successors: %bb.4(0x80000000)
93+
; CHECK-NEXT: {{ $}}
94+
; CHECK-NEXT: J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
95+
; CHECK-NEXT: J2_jump %bb.4, implicit-def $pc
96+
; CHECK-NEXT: {{ $}}
97+
; CHECK-NEXT: bb.4:
98+
; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.1(0x04000000)
99+
; CHECK-NEXT: {{ $}}
100+
; CHECK-NEXT: [[PHI5:%[0-9]+]]:intregs = PHI [[PHI4]], %bb.3, %9, %bb.4
101+
; CHECK-NEXT: [[PHI6:%[0-9]+]]:intregs = PHI [[PHI3]], %bb.3, %11, %bb.4
102+
; CHECK-NEXT: [[A2_tfrsi6:%[0-9]+]]:intregs = A2_tfrsi 0
103+
; CHECK-NEXT: S2_storeri_io [[PHI5]], -4, [[A2_tfrsi6]] :: (store (s32) into %ir.cgep5, align 8)
104+
; CHECK-NEXT: [[A2_addi:%[0-9]+]]:intregs = A2_addi [[PHI6]], 1
105+
; CHECK-NEXT: [[S2_insert:%[0-9]+]]:intregs = S2_insert [[PHI2]], [[A2_addi]], 1, 2
106+
; CHECK-NEXT: [[L2_loadri_io:%[0-9]+]]:intregs = L2_loadri_io [[S2_insert]], 0 :: (load (s32) from %ir.cgep8)
107+
; CHECK-NEXT: S2_storeri_io [[PHI5]], 0, [[L2_loadri_io]] :: (store (s32) into %ir.lsr.iv3)
108+
; CHECK-NEXT: [[A2_addi1:%[0-9]+]]:intregs = A2_addi [[PHI5]], 2
109+
; CHECK-NEXT: ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
110+
; CHECK-NEXT: J2_jump %bb.1, implicit-def dead $pc
111+
bb.0.entry:
112+
successors: %bb.1(0x80000000)
113+
liveins: $r0
114+
115+
%0:intregs = COPY $r0
116+
%1:intregs = S2_setbit_i %0, 0
117+
118+
bb.1:
119+
successors: %bb.2(0x80000000)
120+
121+
J2_loop0i %bb.2, 511, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
122+
123+
bb.2:
124+
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
125+
126+
%2:intregs = PHI %1, %bb.1, %3, %bb.2
127+
%4:intregs = A2_andir %2, 1
128+
%5:intregs = S2_asl_i_r %4, 2
129+
S4_storeiri_io %5, 0, 0 :: (store (s32) into %ir.cgep7)
130+
%6:intregs = A2_tfrsi 1
131+
%7:intregs = A2_tfrsi 4
132+
%3:intregs = A2_tfrsi 0
133+
ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
134+
J2_jump %bb.3, implicit-def dead $pc
135+
136+
bb.3:
137+
successors: %bb.4(0x80000000)
138+
139+
J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
140+
J2_jump %bb.4, implicit-def $pc
141+
142+
bb.4:
143+
successors: %bb.4(0x7c000000), %bb.1(0x04000000)
144+
145+
%8:intregs = PHI %7, %bb.3, %9, %bb.4
146+
%10:intregs = PHI %6, %bb.3, %11, %bb.4
147+
%11:intregs = A2_tfrsi 0
148+
S2_storeri_io %8, -4, %11 :: (store (s32) into %ir.cgep5, align 8)
149+
%12:intregs = A2_addi %10, 1
150+
%13:intregs = S2_insert %5, %12, 1, 2
151+
%14:intregs = L2_loadri_io %13, 0 :: (load (s32) from %ir.cgep8)
152+
S2_storeri_io %8, 0, %14 :: (store (s32) into %ir.lsr.iv3)
153+
%9:intregs = A2_addi %8, 2
154+
ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
155+
J2_jump %bb.1, implicit-def dead $pc
156+
157+
...

0 commit comments

Comments
 (0)