@@ -727,43 +727,46 @@ static const std::string gpu_pipeline =
727727 " linalg-fuse-elementwise-ops,"
728728 " arith-expand,"
729729 " memref-expand,"
730- " arith-bufferize,"
731- " func-bufferize,"
732730 " func.func(empty-tensor-to-alloc-tensor),"
733- " func.func(scf-bufferize),"
734- " func.func(tensor-bufferize),"
735- " func.func(bufferization-bufferize),"
736- " func.func(linalg-bufferize),"
737- " func.func(linalg-detensorize),"
738- " func.func(tensor-bufferize),"
731+ " func.func(tile-for-gpu{tile-sizes=32 in-regions}),"
732+ " func.func(tile-for-gpu{tile-sizes=1 in-regions}),"
739733 " region-bufferize,"
740734 " canonicalize,"
741- " func.func(finalizing-bufferize),"
735+ " one-shot-bufferize,"
736+ " cse,"
737+ " canonicalize,"
738+ " scf-forall-to-parallel,"
739+ " cse,"
740+ " canonicalize,"
742741 " imex-remove-temporaries,"
743- " func.func(convert-linalg-to-parallel-loops),"
744- " func.func(scf-parallel-loop-fusion),"
745- // is add-outer-parallel-loop needed?
746- " func.func(imex-add-outer-parallel-loop),"
742+ " buffer-deallocation-pipeline,"
743+ " func.func(convert-linalg-to-loops),"
747744 " func.func(gpu-map-parallel-loops),"
748- " func.func(convert-parallel-loops-to-gpu),"
745+ " convert-parallel-loops-to-gpu,"
746+ " canonicalize,"
747+ " cse,"
749748 " func.func(insert-gpu-allocs{in-regions=1}),"
750749 " func.func(insert-gpu-copy),"
751750 " drop-regions,"
752751 " canonicalize,"
753- " func.func(lower-affine),"
754752 " gpu-kernel-outlining,"
753+ " convert-scf-to-cf,"
754+ " convert-cf-to-llvm,"
755755 " canonicalize,"
756756 " cse,"
757- " gpu.module(strip-debuginfo,convert-gpu-to-nvvm),"
758- " nvvm-attach-target,"
757+ " gpu.module(strip-debuginfo,"
758+ " convert-gpu-to-nvvm),"
759+ " nvvm-attach-target{chip=sm_80 O=3},"
759760 " func.func(gpu-async-region),"
760761 " expand-strided-metadata,"
761762 " lower-affine,"
762763 " gpu-to-llvm,"
763- " gpu-module-to-binary{format=fatbin},"
764764 " convert-func-to-llvm,"
765765 " convert-math-to-llvm,"
766766 " finalize-memref-to-llvm,"
767+ " canonicalize,"
768+ " cse,"
769+ " gpu-module-to-binary{format=fatbin},"
767770 " reconcile-unrealized-casts" ;
768771
769772const std::string _passes (get_text_env (" SHARPY_PASSES" ));
0 commit comments