-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlir][affine]make affine-loop-unroll a FunctionOpInterface pass. #126475
[mlir][affine]make affine-loop-unroll a FunctionOpInterface pass. #126475
Conversation
@llvm/pr-subscribers-mlir-scf @llvm/pr-subscribers-mlir-affine Author: lonely eagle (linuxlonelyeagle) ChangesMake Full diff: https://github.com/llvm/llvm-project/pull/126475.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
index bc29d04287ac462..37147b079e5d992 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -19,6 +19,7 @@
namespace mlir {
+class ModuleOp;
namespace func {
class FuncOp;
} // namespace func
@@ -93,7 +94,7 @@ std::unique_ptr<OperationPass<func::FuncOp>> createLoopTilingPass();
/// factors supplied through other means. If -1 is passed as the unrollFactor
/// and no callback is provided, anything passed from the command-line (if at
/// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
-std::unique_ptr<OperationPass<func::FuncOp>> createLoopUnrollPass(
+std::unique_ptr<OperationPass<mlir::ModuleOp>> createLoopUnrollPass(
int unrollFactor = -1, bool unrollUpToFactor = false,
bool unrollFull = false,
const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index d7c7897c6573016..d96b50c3e81043c 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -199,7 +199,7 @@ def AffineLoopTiling : Pass<"affine-loop-tile", "func::FuncOp"> {
];
}
-def AffineLoopUnroll : Pass<"affine-loop-unroll", "func::FuncOp"> {
+def AffineLoopUnroll : Pass<"affine-loop-unroll", "ModuleOp"> {
let summary = "Unroll affine loops";
let constructor = "mlir::affine::createLoopUnrollPass()";
let options = [
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
index 57df7ada91654c0..4dc9809574115eb 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
@@ -19,6 +19,7 @@
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinOps.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -82,7 +83,7 @@ static bool isInnermostAffineForOp(AffineForOp op) {
}
/// Gathers loops that have no affine.for's nested within.
-static void gatherInnermostLoops(func::FuncOp f,
+static void gatherInnermostLoops(FunctionOpInterface f,
SmallVectorImpl<AffineForOp> &loops) {
f.walk([&](AffineForOp forOp) {
if (isInnermostAffineForOp(forOp))
@@ -91,40 +92,44 @@ static void gatherInnermostLoops(func::FuncOp f,
}
void LoopUnroll::runOnOperation() {
- func::FuncOp func = getOperation();
- if (func.isExternal())
- return;
-
- if (unrollFull && unrollFullThreshold.hasValue()) {
- // Store short loops as we walk.
+ mlir::ModuleOp module = getOperation();
+ SmallVector<FunctionOpInterface> funcOps;
+ module.walk([&](FunctionOpInterface func) { funcOps.push_back(func); });
+ for (auto func : funcOps) {
+ if (func.isExternal())
+ return;
+
+ if (unrollFull && unrollFullThreshold.hasValue()) {
+ // Store short loops as we walk.
+ SmallVector<AffineForOp, 4> loops;
+
+ // Gathers all loops with trip count <= minTripCount. Do a post order walk
+ // so that loops are gathered from innermost to outermost (or else
+ // unrolling an outer one may delete gathered inner ones).
+ getOperation().walk([&](AffineForOp forOp) {
+ std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
+ if (tripCount && *tripCount <= unrollFullThreshold)
+ loops.push_back(forOp);
+ });
+ for (auto forOp : loops)
+ (void)loopUnrollFull(forOp);
+ return;
+ }
+
+ // If the call back is provided, we will recurse until no loops are found.
SmallVector<AffineForOp, 4> loops;
-
- // Gathers all loops with trip count <= minTripCount. Do a post order walk
- // so that loops are gathered from innermost to outermost (or else unrolling
- // an outer one may delete gathered inner ones).
- getOperation().walk([&](AffineForOp forOp) {
- std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
- if (tripCount && *tripCount <= unrollFullThreshold)
- loops.push_back(forOp);
- });
- for (auto forOp : loops)
- (void)loopUnrollFull(forOp);
- return;
- }
-
- // If the call back is provided, we will recurse until no loops are found.
- SmallVector<AffineForOp, 4> loops;
- for (unsigned i = 0; i < numRepetitions || getUnrollFactor; i++) {
- loops.clear();
- gatherInnermostLoops(func, loops);
- if (loops.empty())
- break;
- bool unrolled = false;
- for (auto forOp : loops)
- unrolled |= succeeded(runOnAffineForOp(forOp));
- if (!unrolled)
- // Break out if nothing was unrolled.
- break;
+ for (unsigned i = 0; i < numRepetitions || getUnrollFactor; i++) {
+ loops.clear();
+ gatherInnermostLoops(func, loops);
+ if (loops.empty())
+ break;
+ bool unrolled = false;
+ for (auto forOp : loops)
+ unrolled |= succeeded(runOnAffineForOp(forOp));
+ if (!unrolled)
+ // Break out if nothing was unrolled.
+ break;
+ }
}
}
@@ -145,7 +150,7 @@ LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
cleanUpUnroll);
}
-std::unique_ptr<OperationPass<func::FuncOp>> mlir::affine::createLoopUnrollPass(
+std::unique_ptr<OperationPass<ModuleOp>> mlir::affine::createLoopUnrollPass(
int unrollFactor, bool unrollUpToFactor, bool unrollFull,
const std::function<unsigned(AffineForOp)> &getUnrollFactor) {
return std::make_unique<LoopUnroll>(
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index e398c3fe2011dd8..43485ca56deeba5 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -240,6 +240,23 @@ func.func @loop_nest_unroll_full() {
return
} // UNROLL-FULL }
+gpu.module @unroll_full {
+ // UNROLL-FULL-LABEL: func @gpu_loop_nest_simplest() {
+ gpu.func @gpu_loop_nest_simplest() {
+ // UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
+ affine.for %i = 0 to 100 step 2 {
+ // UNROLL-FULL: %c1_i32 = arith.constant 1 : i32
+ // UNROLL-FULL-NEXT: %c1_i32_0 = arith.constant 1 : i32
+ // UNROLL-FULL-NEXT: %c1_i32_1 = arith.constant 1 : i32
+ // UNROLL-FULL-NEXT: %c1_i32_2 = arith.constant 1 : i32
+ affine.for %j = 0 to 4 {
+ %x = arith.constant 1 : i32
+ }
+ } // UNROLL-FULL: }
+ gpu.return // UNROLL-FULL: return
+ }
+}
+
// SHORT-LABEL: func @loop_nest_outer_unroll() {
func.func @loop_nest_outer_unroll() {
// SHORT: affine.for %arg0 = 0 to 4 {
@@ -260,6 +277,28 @@ func.func @loop_nest_outer_unroll() {
return // SHORT: return
} // SHORT }
+gpu.module @short {
+ // SHORT-LABEL: func @gpu_loop_nest_outer_unroll() {
+ gpu.func @gpu_loop_nest_outer_unroll() {
+ // SHORT: affine.for %arg0 = 0 to 4 {
+ // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
+ // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
+ // SHORT-NEXT: }
+ // SHORT-NEXT: affine.for %arg0 = 0 to 4 {
+ // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
+ // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
+ // SHORT-NEXT: }
+ affine.for %i = 0 to 2 {
+ affine.for %j = 0 to 4 {
+ %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } :
+ (index) -> (index)
+ %y = "addi32"(%x, %x) : (index, index) -> index
+ }
+ }
+ gpu.return // SHORT: gpu.return
+ } // SHORT }
+}
+
// We are doing a minimal FileCheck here. We just need this test case to
// successfully run. Both %x and %y will get unrolled here as the min trip
// count threshold set to 2.
@@ -345,6 +384,37 @@ func.func @unroll_unit_stride_no_cleanup() {
return
}
+gpu.module @unroll_by_4{
+ // UNROLL-BY-4-LABEL: func @gpu_unroll_unit_stride_no_cleanup() {
+ gpu.func @gpu_unroll_unit_stride_no_cleanup() {
+ // UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
+ affine.for %i = 0 to 100 {
+ // UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 0 to 8 step 4 {
+ // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
+ // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
+ // UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]*}}([[L1]])
+ // UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32
+ // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
+ // UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]*}}([[L1]])
+ // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
+ // UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32
+ // UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]*}}([[L1]])
+ // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
+ // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
+ // UNROLL-BY-4-NEXT: }
+ affine.for %j = 0 to 8 {
+ %x = "addi32"(%j, %j) : (index, index) -> i32
+ %y = "addi32"(%x, %x) : (i32, i32) -> i32
+ }
+ // empty loop
+ // UNROLL-BY-4: affine.for %arg1 = 0 to 8 {
+ affine.for %k = 0 to 8 {
+ }
+ }
+ gpu.return
+ }
+}
+
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
func.func @unroll_unit_stride_cleanup() {
// UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
@@ -632,6 +702,19 @@ func.func @unroll_by_one_should_promote_single_iteration_loop() {
// UNROLL-BY-1-NEXT: return
}
+gpu.module @unroll_by_1 {
+ // UNROLL-BY-1-LABEL: func @gpu_unroll_by_one_should_promote_single_iteration_loop()
+ gpu.func @gpu_unroll_by_one_should_promote_single_iteration_loop() {
+ affine.for %i = 0 to 1 {
+ %x = "foo"(%i) : (index) -> i32
+ }
+ gpu.return
+ // UNROLL-BY-1-NEXT: %c0 = arith.constant 0 : index
+ // UNROLL-BY-1-NEXT: %0 = "foo"(%c0) : (index) -> i32
+ // UNROLL-BY-1-NEXT: gpu.return
+ }
+}
+
// Test unrolling with affine.for iter_args.
// UNROLL-BY-4-LABEL: loop_unroll_with_iter_args_and_cleanup
@@ -706,6 +789,23 @@ func.func @unroll_cleanup_loop_with_larger_unroll_factor() {
// UNROLL-CLEANUP-LOOP-NEXT: return
}
+gpu.module @unroll_cleanup_loop {
+ // UNROLL-CLEANUP-LOOP-LABEL: func @gpu_unroll_cleanup_loop_with_larger_unroll_factor()
+ gpu.func @gpu_unroll_cleanup_loop_with_larger_unroll_factor() {
+ affine.for %i = 0 to 3 {
+ %x = "foo"(%i) : (index) -> i32
+ }
+ gpu.return
+ // UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
+ // UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
+ // UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
+ // UNROLL-CLEANUP-LOOP-NEXT: gpu.return
+ }
+}
+
// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor()
func.func @unroll_cleanup_loop_with_smaller_unroll_factor() {
affine.for %i = 0 to 7 {
|
@llvm/pr-subscribers-mlir Author: lonely eagle (linuxlonelyeagle) ChangesMake Full diff: https://github.com/llvm/llvm-project/pull/126475.diff 4 Files Affected:
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
index bc29d04287ac462..37147b079e5d992 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -19,6 +19,7 @@
namespace mlir {
+class ModuleOp;
namespace func {
class FuncOp;
} // namespace func
@@ -93,7 +94,7 @@ std::unique_ptr<OperationPass<func::FuncOp>> createLoopTilingPass();
/// factors supplied through other means. If -1 is passed as the unrollFactor
/// and no callback is provided, anything passed from the command-line (if at
/// all) or the default unroll factor is used (LoopUnroll:kDefaultUnrollFactor).
-std::unique_ptr<OperationPass<func::FuncOp>> createLoopUnrollPass(
+std::unique_ptr<OperationPass<mlir::ModuleOp>> createLoopUnrollPass(
int unrollFactor = -1, bool unrollUpToFactor = false,
bool unrollFull = false,
const std::function<unsigned(AffineForOp)> &getUnrollFactor = nullptr);
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index d7c7897c6573016..d96b50c3e81043c 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -199,7 +199,7 @@ def AffineLoopTiling : Pass<"affine-loop-tile", "func::FuncOp"> {
];
}
-def AffineLoopUnroll : Pass<"affine-loop-unroll", "func::FuncOp"> {
+def AffineLoopUnroll : Pass<"affine-loop-unroll", "ModuleOp"> {
let summary = "Unroll affine loops";
let constructor = "mlir::affine::createLoopUnrollPass()";
let options = [
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
index 57df7ada91654c0..4dc9809574115eb 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
@@ -19,6 +19,7 @@
#include "mlir/IR/AffineExpr.h"
#include "mlir/IR/AffineMap.h"
#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinOps.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -82,7 +83,7 @@ static bool isInnermostAffineForOp(AffineForOp op) {
}
/// Gathers loops that have no affine.for's nested within.
-static void gatherInnermostLoops(func::FuncOp f,
+static void gatherInnermostLoops(FunctionOpInterface f,
SmallVectorImpl<AffineForOp> &loops) {
f.walk([&](AffineForOp forOp) {
if (isInnermostAffineForOp(forOp))
@@ -91,40 +92,44 @@ static void gatherInnermostLoops(func::FuncOp f,
}
void LoopUnroll::runOnOperation() {
- func::FuncOp func = getOperation();
- if (func.isExternal())
- return;
-
- if (unrollFull && unrollFullThreshold.hasValue()) {
- // Store short loops as we walk.
+ mlir::ModuleOp module = getOperation();
+ SmallVector<FunctionOpInterface> funcOps;
+ module.walk([&](FunctionOpInterface func) { funcOps.push_back(func); });
+ for (auto func : funcOps) {
+ if (func.isExternal())
+ return;
+
+ if (unrollFull && unrollFullThreshold.hasValue()) {
+ // Store short loops as we walk.
+ SmallVector<AffineForOp, 4> loops;
+
+ // Gathers all loops with trip count <= minTripCount. Do a post order walk
+ // so that loops are gathered from innermost to outermost (or else
+ // unrolling an outer one may delete gathered inner ones).
+ getOperation().walk([&](AffineForOp forOp) {
+ std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
+ if (tripCount && *tripCount <= unrollFullThreshold)
+ loops.push_back(forOp);
+ });
+ for (auto forOp : loops)
+ (void)loopUnrollFull(forOp);
+ return;
+ }
+
+ // If the call back is provided, we will recurse until no loops are found.
SmallVector<AffineForOp, 4> loops;
-
- // Gathers all loops with trip count <= minTripCount. Do a post order walk
- // so that loops are gathered from innermost to outermost (or else unrolling
- // an outer one may delete gathered inner ones).
- getOperation().walk([&](AffineForOp forOp) {
- std::optional<uint64_t> tripCount = getConstantTripCount(forOp);
- if (tripCount && *tripCount <= unrollFullThreshold)
- loops.push_back(forOp);
- });
- for (auto forOp : loops)
- (void)loopUnrollFull(forOp);
- return;
- }
-
- // If the call back is provided, we will recurse until no loops are found.
- SmallVector<AffineForOp, 4> loops;
- for (unsigned i = 0; i < numRepetitions || getUnrollFactor; i++) {
- loops.clear();
- gatherInnermostLoops(func, loops);
- if (loops.empty())
- break;
- bool unrolled = false;
- for (auto forOp : loops)
- unrolled |= succeeded(runOnAffineForOp(forOp));
- if (!unrolled)
- // Break out if nothing was unrolled.
- break;
+ for (unsigned i = 0; i < numRepetitions || getUnrollFactor; i++) {
+ loops.clear();
+ gatherInnermostLoops(func, loops);
+ if (loops.empty())
+ break;
+ bool unrolled = false;
+ for (auto forOp : loops)
+ unrolled |= succeeded(runOnAffineForOp(forOp));
+ if (!unrolled)
+ // Break out if nothing was unrolled.
+ break;
+ }
}
}
@@ -145,7 +150,7 @@ LogicalResult LoopUnroll::runOnAffineForOp(AffineForOp forOp) {
cleanUpUnroll);
}
-std::unique_ptr<OperationPass<func::FuncOp>> mlir::affine::createLoopUnrollPass(
+std::unique_ptr<OperationPass<ModuleOp>> mlir::affine::createLoopUnrollPass(
int unrollFactor, bool unrollUpToFactor, bool unrollFull,
const std::function<unsigned(AffineForOp)> &getUnrollFactor) {
return std::make_unique<LoopUnroll>(
diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir
index e398c3fe2011dd8..43485ca56deeba5 100644
--- a/mlir/test/Dialect/Affine/unroll.mlir
+++ b/mlir/test/Dialect/Affine/unroll.mlir
@@ -240,6 +240,23 @@ func.func @loop_nest_unroll_full() {
return
} // UNROLL-FULL }
+gpu.module @unroll_full {
+ // UNROLL-FULL-LABEL: func @gpu_loop_nest_simplest() {
+ gpu.func @gpu_loop_nest_simplest() {
+ // UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
+ affine.for %i = 0 to 100 step 2 {
+ // UNROLL-FULL: %c1_i32 = arith.constant 1 : i32
+ // UNROLL-FULL-NEXT: %c1_i32_0 = arith.constant 1 : i32
+ // UNROLL-FULL-NEXT: %c1_i32_1 = arith.constant 1 : i32
+ // UNROLL-FULL-NEXT: %c1_i32_2 = arith.constant 1 : i32
+ affine.for %j = 0 to 4 {
+ %x = arith.constant 1 : i32
+ }
+ } // UNROLL-FULL: }
+ gpu.return // UNROLL-FULL: return
+ }
+}
+
// SHORT-LABEL: func @loop_nest_outer_unroll() {
func.func @loop_nest_outer_unroll() {
// SHORT: affine.for %arg0 = 0 to 4 {
@@ -260,6 +277,28 @@ func.func @loop_nest_outer_unroll() {
return // SHORT: return
} // SHORT }
+gpu.module @short {
+ // SHORT-LABEL: func @gpu_loop_nest_outer_unroll() {
+ gpu.func @gpu_loop_nest_outer_unroll() {
+ // SHORT: affine.for %arg0 = 0 to 4 {
+ // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
+ // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
+ // SHORT-NEXT: }
+ // SHORT-NEXT: affine.for %arg0 = 0 to 4 {
+ // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
+ // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
+ // SHORT-NEXT: }
+ affine.for %i = 0 to 2 {
+ affine.for %j = 0 to 4 {
+ %x = "affine.apply" (%j) { map = affine_map<(d0) -> (d0 + 1)> } :
+ (index) -> (index)
+ %y = "addi32"(%x, %x) : (index, index) -> index
+ }
+ }
+ gpu.return // SHORT: gpu.return
+ } // SHORT }
+}
+
// We are doing a minimal FileCheck here. We just need this test case to
// successfully run. Both %x and %y will get unrolled here as the min trip
// count threshold set to 2.
@@ -345,6 +384,37 @@ func.func @unroll_unit_stride_no_cleanup() {
return
}
+gpu.module @unroll_by_4{
+ // UNROLL-BY-4-LABEL: func @gpu_unroll_unit_stride_no_cleanup() {
+ gpu.func @gpu_unroll_unit_stride_no_cleanup() {
+ // UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
+ affine.for %i = 0 to 100 {
+ // UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 0 to 8 step 4 {
+ // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
+ // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
+ // UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]*}}([[L1]])
+ // UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32
+ // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
+ // UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]*}}([[L1]])
+ // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
+ // UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32
+ // UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]*}}([[L1]])
+ // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
+ // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
+ // UNROLL-BY-4-NEXT: }
+ affine.for %j = 0 to 8 {
+ %x = "addi32"(%j, %j) : (index, index) -> i32
+ %y = "addi32"(%x, %x) : (i32, i32) -> i32
+ }
+ // empty loop
+ // UNROLL-BY-4: affine.for %arg1 = 0 to 8 {
+ affine.for %k = 0 to 8 {
+ }
+ }
+ gpu.return
+ }
+}
+
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
func.func @unroll_unit_stride_cleanup() {
// UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
@@ -632,6 +702,19 @@ func.func @unroll_by_one_should_promote_single_iteration_loop() {
// UNROLL-BY-1-NEXT: return
}
+gpu.module @unroll_by_1 {
+ // UNROLL-BY-1-LABEL: func @gpu_unroll_by_one_should_promote_single_iteration_loop()
+ gpu.func @gpu_unroll_by_one_should_promote_single_iteration_loop() {
+ affine.for %i = 0 to 1 {
+ %x = "foo"(%i) : (index) -> i32
+ }
+ gpu.return
+ // UNROLL-BY-1-NEXT: %c0 = arith.constant 0 : index
+ // UNROLL-BY-1-NEXT: %0 = "foo"(%c0) : (index) -> i32
+ // UNROLL-BY-1-NEXT: gpu.return
+ }
+}
+
// Test unrolling with affine.for iter_args.
// UNROLL-BY-4-LABEL: loop_unroll_with_iter_args_and_cleanup
@@ -706,6 +789,23 @@ func.func @unroll_cleanup_loop_with_larger_unroll_factor() {
// UNROLL-CLEANUP-LOOP-NEXT: return
}
+gpu.module @unroll_cleanup_loop {
+ // UNROLL-CLEANUP-LOOP-LABEL: func @gpu_unroll_cleanup_loop_with_larger_unroll_factor()
+ gpu.func @gpu_unroll_cleanup_loop_with_larger_unroll_factor() {
+ affine.for %i = 0 to 3 {
+ %x = "foo"(%i) : (index) -> i32
+ }
+ gpu.return
+ // UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
+ // UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
+ // UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
+ // UNROLL-CLEANUP-LOOP-NEXT: gpu.return
+ }
+}
+
// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor()
func.func @unroll_cleanup_loop_with_smaller_unroll_factor() {
affine.for %i = 0 to 7 {
|
The number of tests added is not much compared to func.func, and I don’t know if it is enough. |
7074eaa
to
0861464
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems fine to me, approved
…vm#126475) [mlir][affine]make affine-loop-unroll a FunctionOpInterface pass Make `affine-loop-unroll` a `FunctionOpInterface` pass.Now unroll can be done on gpu.func.
…vm#126475) [mlir][affine]make affine-loop-unroll a FunctionOpInterface pass Make `affine-loop-unroll` a `FunctionOpInterface` pass.Now unroll can be done on gpu.func.
…vm#126475) [mlir][affine]make affine-loop-unroll a FunctionOpInterface pass Make `affine-loop-unroll` a `FunctionOpInterface` pass.Now unroll can be done on gpu.func.
Make
affine-loop-unroll
aFunctionOpInterface
pass.Now unroll can be done on gpu.func.