Skip to content

Commit c986b28

Browse files
committed
update gpu pipeline
1 parent 3f71a7b commit c986b28

File tree

1 file changed

+21
-18
lines changed

1 file changed

+21
-18
lines changed

Diff for: src/jit/mlir.cpp

+21-18
Original file line numberDiff line numberDiff line change
@@ -727,43 +727,46 @@ static const std::string gpu_pipeline =
727727
"linalg-fuse-elementwise-ops,"
728728
"arith-expand,"
729729
"memref-expand,"
730-
"arith-bufferize,"
731-
"func-bufferize,"
732730
"func.func(empty-tensor-to-alloc-tensor),"
733-
"func.func(scf-bufferize),"
734-
"func.func(tensor-bufferize),"
735-
"func.func(bufferization-bufferize),"
736-
"func.func(linalg-bufferize),"
737-
"func.func(linalg-detensorize),"
738-
"func.func(tensor-bufferize),"
731+
"func.func(tile-for-gpu{tile-sizes=32 in-regions}),"
732+
"func.func(tile-for-gpu{tile-sizes=1 in-regions}),"
739733
"region-bufferize,"
740734
"canonicalize,"
741-
"func.func(finalizing-bufferize),"
735+
"one-shot-bufferize,"
736+
"cse,"
737+
"canonicalize,"
738+
"scf-forall-to-parallel,"
739+
"cse,"
740+
"canonicalize,"
742741
"imex-remove-temporaries,"
743-
"func.func(convert-linalg-to-parallel-loops),"
744-
"func.func(scf-parallel-loop-fusion),"
745-
// is add-outer-parallel-loop needed?
746-
"func.func(imex-add-outer-parallel-loop),"
742+
"buffer-deallocation-pipeline,"
743+
"func.func(convert-linalg-to-loops),"
747744
"func.func(gpu-map-parallel-loops),"
748-
"func.func(convert-parallel-loops-to-gpu),"
745+
"convert-parallel-loops-to-gpu,"
746+
"canonicalize,"
747+
"cse,"
749748
"func.func(insert-gpu-allocs{in-regions=1}),"
750749
"func.func(insert-gpu-copy),"
751750
"drop-regions,"
752751
"canonicalize,"
753-
"func.func(lower-affine),"
754752
"gpu-kernel-outlining,"
753+
"convert-scf-to-cf,"
754+
"convert-cf-to-llvm,"
755755
"canonicalize,"
756756
"cse,"
757-
"gpu.module(strip-debuginfo,convert-gpu-to-nvvm),"
758-
"nvvm-attach-target,"
757+
"gpu.module(strip-debuginfo,"
758+
"convert-gpu-to-nvvm),"
759+
"nvvm-attach-target{chip=sm_80 O=3},"
759760
"func.func(gpu-async-region),"
760761
"expand-strided-metadata,"
761762
"lower-affine,"
762763
"gpu-to-llvm,"
763-
"gpu-module-to-binary{format=fatbin},"
764764
"convert-func-to-llvm,"
765765
"convert-math-to-llvm,"
766766
"finalize-memref-to-llvm,"
767+
"canonicalize,"
768+
"cse,"
769+
"gpu-module-to-binary{format=fatbin},"
767770
"reconcile-unrealized-casts";
768771

769772
const std::string _passes(get_text_env("SHARPY_PASSES"));

0 commit comments

Comments
 (0)