Skip to content

Commit a141e58

Browse files
huaatiantstellar
authored andcommitted
[llvm][CodeGen] avoid repeated interval calculation in window scheduler (#132352)
Some new registers are reused when replacing some old ones in certain use case of ModuloScheduleExpander. It is necessary to avoid repeated interval calculations for these registers. (cherry picked from commit 7e65944)
1 parent d88cd35 commit a141e58

File tree

3 files changed

+108
-31
lines changed

3 files changed

+108
-31
lines changed

llvm/include/llvm/CodeGen/ModuloSchedule.h

-4
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,6 @@ class ModuloScheduleExpander {
188188
/// Instructions to change when emitting the final schedule.
189189
InstrChangesTy InstrChanges;
190190

191-
/// Record the registers that need to compute live intervals.
192-
SmallVector<Register> NoIntervalRegs;
193-
194191
void generatePipelinedLoop();
195192
void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
196193
ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
@@ -214,7 +211,6 @@ class ModuloScheduleExpander {
214211
void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
215212
MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
216213
ValueMapTy *VRMap);
217-
void calculateIntervals();
218214
bool computeDelta(MachineInstr &MI, unsigned &Delta);
219215
void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
220216
unsigned Num);

llvm/lib/CodeGen/ModuloSchedule.cpp

+5-27
Original file line numberDiff line numberDiff line change
@@ -181,10 +181,6 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
181181
// Add branches between prolog and epilog blocks.
182182
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
183183

184-
// The intervals of newly created virtual registers are calculated after the
185-
// kernel expansion.
186-
calculateIntervals();
187-
188184
delete[] VRMap;
189185
delete[] VRMapPhi;
190186
}
@@ -546,10 +542,8 @@ void ModuloScheduleExpander::generateExistingPhis(
546542
if (VRMap[LastStageNum - np - 1].count(LoopVal))
547543
PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
548544

549-
if (IsLast && np == NumPhis - 1) {
545+
if (IsLast && np == NumPhis - 1)
550546
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
551-
NoIntervalRegs.push_back(NewReg);
552-
}
553547
continue;
554548
}
555549
}
@@ -589,10 +583,8 @@ void ModuloScheduleExpander::generateExistingPhis(
589583
// Check if we need to rename any uses that occurs after the loop. The
590584
// register to replace depends on whether the Phi is scheduled in the
591585
// epilog.
592-
if (IsLast && np == NumPhis - 1) {
586+
if (IsLast && np == NumPhis - 1)
593587
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
594-
NoIntervalRegs.push_back(NewReg);
595-
}
596588

597589
// In the kernel, a dependent Phi uses the value from this Phi.
598590
if (InKernel)
@@ -612,10 +604,8 @@ void ModuloScheduleExpander::generateExistingPhis(
612604
if (NumStages == 0 && IsLast) {
613605
auto &CurStageMap = VRMap[CurStageNum];
614606
auto It = CurStageMap.find(LoopVal);
615-
if (It != CurStageMap.end()) {
607+
if (It != CurStageMap.end())
616608
replaceRegUsesAfterLoop(Def, It->second, BB, MRI);
617-
NoIntervalRegs.push_back(It->second);
618-
}
619609
}
620610
}
621611
}
@@ -735,10 +725,8 @@ void ModuloScheduleExpander::generatePhis(
735725
rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
736726
NewReg);
737727
}
738-
if (IsLast && np == NumPhis - 1) {
728+
if (IsLast && np == NumPhis - 1)
739729
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
740-
NoIntervalRegs.push_back(NewReg);
741-
}
742730
}
743731
}
744732
}
@@ -950,14 +938,6 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
950938
}
951939
}
952940

953-
/// Some registers are generated during the kernel expansion. We calculate the
954-
/// live intervals of these registers after the expansion.
955-
void ModuloScheduleExpander::calculateIntervals() {
956-
for (Register Reg : NoIntervalRegs)
957-
LIS.createAndComputeVirtRegInterval(Reg);
958-
NoIntervalRegs.clear();
959-
}
960-
961941
/// Return true if we can compute the amount the instruction changes
962942
/// during each iteration. Set Delta to the amount of the change.
963943
bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
@@ -1078,10 +1058,8 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
10781058
Register NewReg = MRI.createVirtualRegister(RC);
10791059
MO.setReg(NewReg);
10801060
VRMap[CurStageNum][reg] = NewReg;
1081-
if (LastDef) {
1061+
if (LastDef)
10821062
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI);
1083-
NoIntervalRegs.push_back(NewReg);
1084-
}
10851063
} else if (MO.isUse()) {
10861064
MachineInstr *Def = MRI.getVRegDef(reg);
10871065
// Compute the stage that contains the last definition for instruction.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc --mtriple=aarch64 %s -run-pass=pipeliner -o - | FileCheck %s
3+
4+
...
5+
---
6+
name: foo
7+
tracksRegLiveness: true
8+
body: |
9+
; CHECK-LABEL: name: foo
10+
; CHECK: bb.0:
11+
; CHECK-NEXT: successors: %bb.3(0x80000000)
12+
; CHECK-NEXT: liveins: $x0
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
15+
; CHECK-NEXT: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
16+
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
17+
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64sp = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
18+
; CHECK-NEXT: {{ $}}
19+
; CHECK-NEXT: bb.3:
20+
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
21+
; CHECK-NEXT: {{ $}}
22+
; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[FMOVD0_]], [[FMOVD0_]], implicit $fpcr
23+
; CHECK-NEXT: [[SUBSXri:%[0-9]+]]:gpr64 = nsw SUBSXri [[SUBREG_TO_REG]], 1, 0, implicit-def $nzcv
24+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[SUBSXri]]
25+
; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 112
26+
; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv
27+
; CHECK-NEXT: B %bb.4
28+
; CHECK-NEXT: {{ $}}
29+
; CHECK-NEXT: bb.4:
30+
; CHECK-NEXT: successors: %bb.5(0x80000000), %bb.6(0x00000000)
31+
; CHECK-NEXT: {{ $}}
32+
; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[FADDDrr]], [[FMOVD0_]], implicit $fpcr
33+
; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[FMOVD0_]], [[FMOVD0_]], implicit $fpcr
34+
; CHECK-NEXT: [[SUBSXri1:%[0-9]+]]:gpr64 = nsw SUBSXri [[COPY1]], 1, 0, implicit-def $nzcv
35+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[SUBSXri1]]
36+
; CHECK-NEXT: [[FMOVDi1:%[0-9]+]]:fpr64 = FMOVDi 112
37+
; CHECK-NEXT: Bcc 0, %bb.6, implicit $nzcv
38+
; CHECK-NEXT: B %bb.5
39+
; CHECK-NEXT: {{ $}}
40+
; CHECK-NEXT: bb.5:
41+
; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.5(0x7c000000)
42+
; CHECK-NEXT: {{ $}}
43+
; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr64sp = PHI [[COPY2]], %bb.4, %24, %bb.5
44+
; CHECK-NEXT: [[PHI1:%[0-9]+]]:fpr64 = PHI [[FMOVDi1]], %bb.4, %25, %bb.5
45+
; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.4, [[PHI1]], %bb.5
46+
; CHECK-NEXT: [[PHI3:%[0-9]+]]:fpr64 = PHI [[FADDDrr2]], %bb.4, %22, %bb.5
47+
; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr1]], %bb.4, %23, %bb.5
48+
; CHECK-NEXT: [[SUBSXri2:%[0-9]+]]:gpr64 = nsw SUBSXri [[PHI]], 1, 0, implicit-def $nzcv
49+
; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[PHI2]], [[FMOVD0_]], implicit $fpcr
50+
; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[PHI3]], [[PHI2]], implicit $fpcr
51+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64all = COPY [[SUBSXri2]]
52+
; CHECK-NEXT: STRDui [[PHI4]], [[COPY]], 0
53+
; CHECK-NEXT: [[FMOVDi2:%[0-9]+]]:fpr64 = FMOVDi 112
54+
; CHECK-NEXT: Bcc 1, %bb.5, implicit $nzcv
55+
; CHECK-NEXT: B %bb.6
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: bb.6:
58+
; CHECK-NEXT: successors: %bb.7(0x80000000)
59+
; CHECK-NEXT: {{ $}}
60+
; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.4, [[PHI1]], %bb.5
61+
; CHECK-NEXT: [[PHI6:%[0-9]+]]:fpr64 = PHI [[FADDDrr2]], %bb.4, [[FADDDrr3]], %bb.5
62+
; CHECK-NEXT: [[PHI7:%[0-9]+]]:fpr64 = PHI [[FADDDrr1]], %bb.4, [[FADDDrr4]], %bb.5
63+
; CHECK-NEXT: STRDui [[PHI7]], [[COPY]], 0
64+
; CHECK-NEXT: {{ $}}
65+
; CHECK-NEXT: bb.7:
66+
; CHECK-NEXT: successors: %bb.2(0x80000000)
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[FMOVD0_]], %bb.3, [[PHI5]], %bb.6
69+
; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr]], %bb.3, [[PHI6]], %bb.6
70+
; CHECK-NEXT: [[FADDDrr5:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[PHI9]], [[PHI8]], implicit $fpcr
71+
; CHECK-NEXT: STRDui [[FADDDrr5]], [[COPY]], 0
72+
; CHECK-NEXT: B %bb.2
73+
; CHECK-NEXT: {{ $}}
74+
; CHECK-NEXT: bb.2:
75+
; CHECK-NEXT: RET_ReallyLR
76+
bb.0:
77+
successors: %bb.1(0x80000000)
78+
liveins: $x0
79+
80+
%0:gpr64common = COPY $x0
81+
%1:fpr64 = FMOVD0
82+
%2:gpr32 = MOVi32imm 1
83+
%3:gpr64all = SUBREG_TO_REG 0, killed %2, %subreg.sub_32
84+
85+
bb.1:
86+
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
87+
88+
%4:gpr64sp = PHI %3, %bb.0, %5, %bb.1
89+
%6:fpr64 = PHI %1, %bb.0, %7, %bb.1
90+
%8:fpr64 = PHI %1, %bb.0, %6, %bb.1
91+
%9:fpr64 = nofpexcept FADDDrr %8, %1, implicit $fpcr
92+
%10:fpr64 = nofpexcept FADDDrr killed %9, %6, implicit $fpcr
93+
STRDui killed %10, %0, 0
94+
%11:gpr64 = nsw SUBSXri %4, 1, 0, implicit-def $nzcv
95+
%5:gpr64all = COPY %11
96+
%7:fpr64 = FMOVDi 112
97+
Bcc 1, %bb.1, implicit $nzcv
98+
B %bb.2
99+
100+
bb.2:
101+
RET_ReallyLR
102+
103+
...

0 commit comments

Comments
 (0)