@@ -240,6 +240,23 @@ func.func @loop_nest_unroll_full() {
240
240
return
241
241
} // UNROLL-FULL }
242
242
243
+ gpu.module @unroll_full {
244
+ // UNROLL-FULL-LABEL: func @gpu_loop_nest_simplest() {
245
+ gpu.func @gpu_loop_nest_simplest () {
246
+ // UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
247
+ affine.for %i = 0 to 100 step 2 {
248
+ // UNROLL-FULL: %c1_i32 = arith.constant 1 : i32
249
+ // UNROLL-FULL-NEXT: %c1_i32_0 = arith.constant 1 : i32
250
+ // UNROLL-FULL-NEXT: %c1_i32_1 = arith.constant 1 : i32
251
+ // UNROLL-FULL-NEXT: %c1_i32_2 = arith.constant 1 : i32
252
+ affine.for %j = 0 to 4 {
253
+ %x = arith.constant 1 : i32
254
+ }
255
+ } // UNROLL-FULL: }
256
+ gpu.return // UNROLL-FULL: return
257
+ }
258
+ }
259
+
243
260
// SHORT-LABEL: func @loop_nest_outer_unroll() {
244
261
func.func @loop_nest_outer_unroll () {
245
262
// SHORT: affine.for %arg0 = 0 to 4 {
@@ -260,6 +277,28 @@ func.func @loop_nest_outer_unroll() {
260
277
return // SHORT: return
261
278
} // SHORT }
262
279
280
+ gpu.module @short {
281
+ // SHORT-LABEL: func @gpu_loop_nest_outer_unroll() {
282
+ gpu.func @gpu_loop_nest_outer_unroll () {
283
+ // SHORT: affine.for %arg0 = 0 to 4 {
284
+ // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
285
+ // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
286
+ // SHORT-NEXT: }
287
+ // SHORT-NEXT: affine.for %arg0 = 0 to 4 {
288
+ // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
289
+ // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
290
+ // SHORT-NEXT: }
291
+ affine.for %i = 0 to 2 {
292
+ affine.for %j = 0 to 4 {
293
+ %x = " affine.apply" (%j ) { map = affine_map <(d0 ) -> (d0 + 1 )> } :
294
+ (index ) -> (index )
295
+ %y = " addi32" (%x , %x ) : (index , index ) -> index
296
+ }
297
+ }
298
+ gpu.return // SHORT: gpu.return
299
+ } // SHORT }
300
+ }
301
+
263
302
// We are doing a minimal FileCheck here. We just need this test case to
264
303
// successfully run. Both %x and %y will get unrolled here as the min trip
265
304
// count threshold set to 2.
@@ -345,6 +384,37 @@ func.func @unroll_unit_stride_no_cleanup() {
345
384
return
346
385
}
347
386
387
+ gpu.module @unroll_by_4 {
388
+ // UNROLL-BY-4-LABEL: func @gpu_unroll_unit_stride_no_cleanup() {
389
+ gpu.func @gpu_unroll_unit_stride_no_cleanup () {
390
+ // UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
391
+ affine.for %i = 0 to 100 {
392
+ // UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 0 to 8 step 4 {
393
+ // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
394
+ // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
395
+ // UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]*}}([[L1]])
396
+ // UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32
397
+ // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
398
+ // UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]*}}([[L1]])
399
+ // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
400
+ // UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32
401
+ // UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]*}}([[L1]])
402
+ // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
403
+ // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
404
+ // UNROLL-BY-4-NEXT: }
405
+ affine.for %j = 0 to 8 {
406
+ %x = " addi32" (%j , %j ) : (index , index ) -> i32
407
+ %y = " addi32" (%x , %x ) : (i32 , i32 ) -> i32
408
+ }
409
+ // empty loop
410
+ // UNROLL-BY-4: affine.for %arg1 = 0 to 8 {
411
+ affine.for %k = 0 to 8 {
412
+ }
413
+ }
414
+ gpu.return
415
+ }
416
+ }
417
+
348
418
// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
349
419
func.func @unroll_unit_stride_cleanup () {
350
420
// UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
@@ -632,6 +702,19 @@ func.func @unroll_by_one_should_promote_single_iteration_loop() {
632
702
// UNROLL-BY-1-NEXT: return
633
703
}
634
704
705
+ gpu.module @unroll_by_1 {
706
+ // UNROLL-BY-1-LABEL: func @gpu_unroll_by_one_should_promote_single_iteration_loop()
707
+ gpu.func @gpu_unroll_by_one_should_promote_single_iteration_loop () {
708
+ affine.for %i = 0 to 1 {
709
+ %x = " foo" (%i ) : (index ) -> i32
710
+ }
711
+ gpu.return
712
+ // UNROLL-BY-1-NEXT: %c0 = arith.constant 0 : index
713
+ // UNROLL-BY-1-NEXT: %0 = "foo"(%c0) : (index) -> i32
714
+ // UNROLL-BY-1-NEXT: gpu.return
715
+ }
716
+ }
717
+
635
718
// Test unrolling with affine.for iter_args.
636
719
637
720
// UNROLL-BY-4-LABEL: loop_unroll_with_iter_args_and_cleanup
@@ -706,6 +789,23 @@ func.func @unroll_cleanup_loop_with_larger_unroll_factor() {
706
789
// UNROLL-CLEANUP-LOOP-NEXT: return
707
790
}
708
791
792
+ gpu.module @unroll_cleanup_loop {
793
+ // UNROLL-CLEANUP-LOOP-LABEL: func @gpu_unroll_cleanup_loop_with_larger_unroll_factor()
794
+ gpu.func @gpu_unroll_cleanup_loop_with_larger_unroll_factor () {
795
+ affine.for %i = 0 to 3 {
796
+ %x = " foo" (%i ) : (index ) -> i32
797
+ }
798
+ gpu.return
799
+ // UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
800
+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
801
+ // UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
802
+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
803
+ // UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
804
+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
805
+ // UNROLL-CLEANUP-LOOP-NEXT: gpu.return
806
+ }
807
+ }
808
+
709
809
// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor()
710
810
func.func @unroll_cleanup_loop_with_smaller_unroll_factor () {
711
811
affine.for %i = 0 to 7 {
0 commit comments