diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 4366418b2379d..e777f950a7c5a 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Scalar/LoopInterchange.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" @@ -72,6 +73,13 @@ using LoopVector = SmallVector; // TODO: Check if we can use a sparse matrix here. using CharMatrix = std::vector>; +/// Types of rules used in profitability check. +enum class RuleTy { + PerLoopCacheAnalysis, + PerInstrOrderCost, + ForVectorization, +}; + } // end anonymous namespace // Minimum loop depth supported. @@ -84,12 +92,31 @@ static cl::opt MaxLoopNestDepth( "loop-interchange-max-loop-nest-depth", cl::init(10), cl::Hidden, cl::desc("Maximum depth of loop nest considered for the transform")); -static cl::opt PrioritizeVectorization( - "loop-interchange-prioritize-vectorization", cl::init(false), cl::Hidden, - cl::desc("Prioritize increasing vectorization opportunity over cache cost " - "when determining profitability")); +// We prefer cache cost to vectorization by default. +static cl::list Profitabilities( + "loop-interchange-profitabilities", cl::ZeroOrMore, + cl::MiscFlags::CommaSeparated, cl::Hidden, + cl::desc("List of profitability heuristics to be used. They are applied in " + "the given order"), + cl::list_init({RuleTy::PerLoopCacheAnalysis, + RuleTy::PerInstrOrderCost, + RuleTy::ForVectorization}), + cl::values(clEnumValN(RuleTy::PerLoopCacheAnalysis, "cache", + "Prioritize loop cache cost"), + clEnumValN(RuleTy::PerInstrOrderCost, "instorder", + "Prioritize the IVs order of each instruction"), + clEnumValN(RuleTy::ForVectorization, "vectorize", + "Prioritize vectorization"))); #ifndef NDEBUG +static bool noDuplicateRules(ArrayRef Rules) { + SmallSet Set; + for (RuleTy Rule : Rules) + if (!Set.insert(Rule).second) + return false; + return true; +} + static void printDepMatrix(CharMatrix &DepMatrix) { for (auto &Row : DepMatrix) { for (auto D : Row) @@ -1204,26 +1231,9 @@ bool LoopInterchangeProfitability::isProfitable( // second highest priority rule (isProfitablePerInstrOrderCost by default). // Likewise, if it failed to analysis the profitability then only, the last // rule (isProfitableForVectorization by default) will decide. - enum class RuleTy { - PerLoopCacheAnalysis, - PerInstrOrderCost, - ForVectorization, - }; - - // We prefer cache cost to vectorization by default. - RuleTy RuleOrder[3] = {RuleTy::PerLoopCacheAnalysis, - RuleTy::PerInstrOrderCost, RuleTy::ForVectorization}; - - // If we prefer vectorization to cache cost, change the order of application - // of each rule. - if (PrioritizeVectorization) { - RuleOrder[0] = RuleTy::ForVectorization; - RuleOrder[1] = RuleTy::PerLoopCacheAnalysis; - RuleOrder[2] = RuleTy::PerInstrOrderCost; - } - + assert(noDuplicateRules(Profitabilities) && "Detect duplicate rules"); std::optional shouldInterchange; - for (RuleTy RT : RuleOrder) { + for (RuleTy RT : Profitabilities) { switch (RT) { case RuleTy::PerLoopCacheAnalysis: shouldInterchange = isProfitablePerLoopCacheAnalysis(CostMap, CC); diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll index 0018aa0308f28..85be48cb9a710 100644 --- a/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll +++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization.ll @@ -3,7 +3,7 @@ ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-CACHE %s ; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \ -; RUN: -pass-remarks-output=%t -disable-output -loop-interchange-prioritize-vectorization=1 +; RUN: -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize,cache,instorder ; RUN: FileCheck -input-file %t --check-prefix=PROFIT-VEC %s @A = dso_local global [256 x [256 x float]] zeroinitializer