@@ -727,43 +727,46 @@ static const std::string gpu_pipeline =
727
727
" linalg-fuse-elementwise-ops,"
728
728
" arith-expand,"
729
729
" memref-expand,"
730
- " arith-bufferize,"
731
- " func-bufferize,"
732
730
" func.func(empty-tensor-to-alloc-tensor),"
733
- " func.func(scf-bufferize),"
734
- " func.func(tensor-bufferize),"
735
- " func.func(bufferization-bufferize),"
736
- " func.func(linalg-bufferize),"
737
- " func.func(linalg-detensorize),"
738
- " func.func(tensor-bufferize),"
731
+ " func.func(tile-for-gpu{tile-sizes=32 in-regions}),"
732
+ " func.func(tile-for-gpu{tile-sizes=1 in-regions}),"
739
733
" region-bufferize,"
740
734
" canonicalize,"
741
- " func.func(finalizing-bufferize),"
735
+ " one-shot-bufferize,"
736
+ " cse,"
737
+ " canonicalize,"
738
+ " scf-forall-to-parallel,"
739
+ " cse,"
740
+ " canonicalize,"
742
741
" imex-remove-temporaries,"
743
- " func.func(convert-linalg-to-parallel-loops),"
744
- " func.func(scf-parallel-loop-fusion),"
745
- // is add-outer-parallel-loop needed?
746
- " func.func(imex-add-outer-parallel-loop),"
742
+ " buffer-deallocation-pipeline,"
743
+ " func.func(convert-linalg-to-loops),"
747
744
" func.func(gpu-map-parallel-loops),"
748
- " func.func(convert-parallel-loops-to-gpu),"
745
+ " convert-parallel-loops-to-gpu,"
746
+ " canonicalize,"
747
+ " cse,"
749
748
" func.func(insert-gpu-allocs{in-regions=1}),"
750
749
" func.func(insert-gpu-copy),"
751
750
" drop-regions,"
752
751
" canonicalize,"
753
- " func.func(lower-affine),"
754
752
" gpu-kernel-outlining,"
753
+ " convert-scf-to-cf,"
754
+ " convert-cf-to-llvm,"
755
755
" canonicalize,"
756
756
" cse,"
757
- " gpu.module(strip-debuginfo,convert-gpu-to-nvvm),"
758
- " nvvm-attach-target,"
757
+ " gpu.module(strip-debuginfo,"
758
+ " convert-gpu-to-nvvm),"
759
+ " nvvm-attach-target{chip=sm_80 O=3},"
759
760
" func.func(gpu-async-region),"
760
761
" expand-strided-metadata,"
761
762
" lower-affine,"
762
763
" gpu-to-llvm,"
763
- " gpu-module-to-binary{format=fatbin},"
764
764
" convert-func-to-llvm,"
765
765
" convert-math-to-llvm,"
766
766
" finalize-memref-to-llvm,"
767
+ " canonicalize,"
768
+ " cse,"
769
+ " gpu-module-to-binary{format=fatbin},"
767
770
" reconcile-unrealized-casts" ;
768
771
769
772
const std::string _passes (get_text_env (" SHARPY_PASSES" ));
0 commit comments