Skip to content

Commit 528e408

Browse files
authored
[LoopInterchange] Add an option to control the cost heuristics applied (#133664)
LoopInterchange has several heuristic functions to determine if exchanging two loops is profitable or not. Whether or not to use each heuristic and the order in which to use them were fixed, but #125830 allows them to be changed internally at will. This patch adds a new option to control them via the compiler option. The previous patch also added an option to prioritize the vectorization heuristic. This patch also removes it to avoid conflicts between it and the newly introduced one, e.g., both `-loop-interchange-prioritize-vectorization=1` and `-loop-interchange-profitabilities='cache,vectorization'` are specified.
1 parent c47023d commit 528e408

File tree

2 files changed

+34
-24
lines changed

2 files changed

+34
-24
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

+33-23
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
#include "llvm/Transforms/Scalar/LoopInterchange.h"
1616
#include "llvm/ADT/STLExtras.h"
17+
#include "llvm/ADT/SmallSet.h"
1718
#include "llvm/ADT/SmallVector.h"
1819
#include "llvm/ADT/Statistic.h"
1920
#include "llvm/ADT/StringRef.h"
@@ -72,6 +73,13 @@ using LoopVector = SmallVector<Loop *, 8>;
7273
// TODO: Check if we can use a sparse matrix here.
7374
using CharMatrix = std::vector<std::vector<char>>;
7475

76+
/// Types of rules used in profitability check.
77+
enum class RuleTy {
78+
PerLoopCacheAnalysis,
79+
PerInstrOrderCost,
80+
ForVectorization,
81+
};
82+
7583
} // end anonymous namespace
7684

7785
// Minimum loop depth supported.
@@ -84,12 +92,31 @@ static cl::opt<unsigned int> MaxLoopNestDepth(
8492
"loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden,
8593
cl::desc("Maximum depth of loop nest considered for the transform"));
8694

87-
static cl::opt<bool> PrioritizeVectorization(
88-
"loop-interchange-prioritize-vectorization", cl::init(false), cl::Hidden,
89-
cl::desc("Prioritize increasing vectorization opportunity over cache cost "
90-
"when determining profitability"));
95+
// We prefer cache cost to vectorization by default.
96+
static cl::list<RuleTy> Profitabilities(
97+
"loop-interchange-profitabilities", cl::ZeroOrMore,
98+
cl::MiscFlags::CommaSeparated, cl::Hidden,
99+
cl::desc("List of profitability heuristics to be used. They are applied in "
100+
"the given order"),
101+
cl::list_init<RuleTy>({RuleTy::PerLoopCacheAnalysis,
102+
RuleTy::PerInstrOrderCost,
103+
RuleTy::ForVectorization}),
104+
cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache",
105+
"Prioritize loop cache cost"),
106+
clEnumValN(RuleTy::PerInstrOrderCost, "instorder",
107+
"Prioritize the IVs order of each instruction"),
108+
clEnumValN(RuleTy::ForVectorization, "vectorize",
109+
"Prioritize vectorization")));
91110

92111
#ifndef NDEBUG
112+
static bool noDuplicateRules(ArrayRef<RuleTy> Rules) {
113+
SmallSet<RuleTy, 4> Set;
114+
for (RuleTy Rule : Rules)
115+
if (!Set.insert(Rule).second)
116+
return false;
117+
return true;
118+
}
119+
93120
static void printDepMatrix(CharMatrix &DepMatrix) {
94121
for (auto &Row : DepMatrix) {
95122
for (auto D : Row)
@@ -1204,26 +1231,9 @@ bool LoopInterchangeProfitability::isProfitable(
12041231
// second highest priority rule (isProfitablePerInstrOrderCost by default).
12051232
// Likewise, if it failed to analysis the profitability then only, the last
12061233
// rule (isProfitableForVectorization by default) will decide.
1207-
enum class RuleTy {
1208-
PerLoopCacheAnalysis,
1209-
PerInstrOrderCost,
1210-
ForVectorization,
1211-
};
1212-
1213-
// We prefer cache cost to vectorization by default.
1214-
RuleTy RuleOrder[3] = {RuleTy::PerLoopCacheAnalysis,
1215-
RuleTy::PerInstrOrderCost, RuleTy::ForVectorization};
1216-
1217-
// If we prefer vectorization to cache cost, change the order of application
1218-
// of each rule.
1219-
if (PrioritizeVectorization) {
1220-
RuleOrder[0] = RuleTy::ForVectorization;
1221-
RuleOrder[1] = RuleTy::PerLoopCacheAnalysis;
1222-
RuleOrder[2] = RuleTy::PerInstrOrderCost;
1223-
}
1224-
1234+
assert(noDuplicateRules(Profitabilities) && "Detect duplicate rules");
12251235
std::optional<bool> shouldInterchange;
1226-
for (RuleTy RT : RuleOrder) {
1236+
for (RuleTy RT : Profitabilities) {
12271237
switch (RT) {
12281238
case RuleTy::PerLoopCacheAnalysis:
12291239
shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC);

llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
; RUN: FileCheck -input-file %t --check-prefix=PROFIT-CACHE %s
44

55
; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
6-
; RUN: -pass-remarks-output=%t -disable-output -loop-interchange-prioritize-vectorization=1
6+
; RUN: -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize,cache,instorder
77
; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s
88

99
@A = dso_local global [256 x [256 x float]] zeroinitializer

0 commit comments

Comments
 (0)