From f73b919d57a0e633a28e77c12d81a127f87f9068 Mon Sep 17 00:00:00 2001 From: Uttarayan Mondal Date: Mon, 23 Dec 2024 22:35:35 +0530 Subject: [PATCH 01/12] feat: Initial cuda support --- mnn-sys/Cargo.toml | 1 + mnn-sys/build.rs | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/mnn-sys/Cargo.toml b/mnn-sys/Cargo.toml index 08075d1..5314318 100644 --- a/mnn-sys/Cargo.toml +++ b/mnn-sys/Cargo.toml @@ -30,6 +30,7 @@ thiserror = "2.0.3" [features] opencl = [] +cuda = [] metal = [] coreml = ["metal"] vulkan = [] diff --git a/mnn-sys/build.rs b/mnn-sys/build.rs index 5d06bea..d11a843 100644 --- a/mnn-sys/build.rs +++ b/mnn-sys/build.rs @@ -463,6 +463,7 @@ impl CxxOption { cxx_option_from_features! { VULKAN => "vulkan", "MNN_VULKAN", METAL => "metal", "MNN_METAL", + CUDA => "cuda", "MNN_CUDA", COREML => "coreml", "MNN_COREML", OPENCL => "opencl", "MNN_OPENCL", CRT_STATIC => "crt_static", "MNN_WIN_RUNTIME_MT", @@ -697,6 +698,8 @@ pub fn mnn_cpp_build(vendor: impl AsRef) -> Result<()> { let build = opencl(build, vendor).change_context(Error)?; #[cfg(feature = "metal")] let build = metal(build, vendor).change_context(Error)?; + #[cfg(feature = "cuda")] + let build = cuda(build, vendor).change_context(Error)?; build .try_compile("mnn") @@ -1037,3 +1040,29 @@ pub fn cc_builder() -> cc::Build { .std("c++11") .to_owned() } + +pub fn cuda(mut build: cc::Build, vendor: impl AsRef) -> Result { + let cuda_dir = vendor.as_ref().join("source/backend/cuda"); + let cuda_files = ignore::WalkBuilder::new(cuda_dir.join("core")) + .add(cuda_dir.join("execution")) + .build() + .flatten() + .filter(|p| p.path().has_extension(["cpp", "cu"])) + .map(|e| e.into_path()); + cc::Build::new() + .cuda(true) + .cudart("static") + .includes(mnn_includes(vendor)) + .files(cuda_files) + .try_compile("MNNCuda") + .change_context(Error) + .attach_printable("Failed to compile MNNCuda")?; + build.define("MNN_CUDA_ENABLED", "1"); + Ok(build) +} + +pub fn find_cuda() -> Result { + std::env::var("CUDA_PATH") + .change_context(Error) + .map(PathBuf::from) +} From 967fc48b0a9623c015b5b5cae935092c17ad1e46 Mon Sep 17 00:00:00 2001 From: uttarayan21 Date: Mon, 23 Dec 2024 23:28:29 +0530 Subject: [PATCH 02/12] feat: Added proper cuda support --- Cargo.toml | 1 + benches/mnn-bench.rs | 15 +++++++++++++++ flake.nix | 3 +++ mnn-sys/build.rs | 35 ++++++++++++++++++++++++++++++----- src/schedule.rs | 13 +++++++++++++ 5 files changed, 62 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 1ffdae5..c1ff8b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ opencl = ["mnn-sys/opencl"] metal = ["mnn-sys/metal"] coreml = ["mnn-sys/coreml"] +cuda = ["mnn-sys/cuda"] vulkan = [] # This is currently unimplemented diff --git a/benches/mnn-bench.rs b/benches/mnn-bench.rs index f05e438..69737c0 100644 --- a/benches/mnn-bench.rs +++ b/benches/mnn-bench.rs @@ -45,4 +45,19 @@ mod mnn_realesr_bench_with_ones { net.wait(&session); }); } + + #[cfg(feature = "cuda")] + #[divan::bench] + pub fn mnn_realesr_benchmark_metal(bencher: Bencher) { + let net = Interpreter::from_file("tests/assets/realesr.mnn").unwrap(); + let mut config = ScheduleConfig::new(); + config.set_type(ForwardType::Cuda); + let session = net.create_session(config).unwrap(); + bencher.bench_local(|| { + let mut input = net.input(&session, "data").unwrap(); + input.fill(1f32); + net.run_session(&session).unwrap(); + net.wait(&session); + }); + } } diff --git a/flake.nix b/flake.nix index d007e57..98b6a6f 100644 --- a/flake.nix +++ b/flake.nix @@ -42,6 +42,7 @@ flake-utils.lib.eachDefaultSystem ( system: let pkgs = import nixpkgs { + config.allowUnfree = true; inherit system; overlays = [ rust-overlay.overlays.default @@ -204,8 +205,10 @@ // { MNN_SRC = null; LLDB_DEBUGSERVER_PATH = "/Applications/Xcode.app/Contents/SharedFrameworks/LLDB.framework/Versions/A/Resources/debugserver"; + CUDA_PATH = "${pkgs.cudatoolkit}"; packages = with pkgs; [ + cudatoolkit cargo-audit cargo-deny cargo-hakari diff --git a/mnn-sys/build.rs b/mnn-sys/build.rs index d11a843..6205a73 100644 --- a/mnn-sys/build.rs +++ b/mnn-sys/build.rs @@ -1052,8 +1052,28 @@ pub fn cuda(mut build: cc::Build, vendor: impl AsRef) -> Result cc::Build::new() .cuda(true) .cudart("static") - .includes(mnn_includes(vendor)) + .std("c++17") + .flag("-O3") + .includes(mnn_includes(vendor.as_ref())) + .include(vendor.as_ref().join("3rd_party/cutlass/v3_4_0/include")) + .include(&cuda_dir) + .pipe(|b| { + if *TARGET_OS == "windows" { + b.flag("-Xcompiler").flag("/FS"); + } + b + }) + .pipe(cuda_compute(60, false)) + .pipe(cuda_compute(61, false)) + .pipe(cuda_compute(62, false)) + .pipe(cuda_compute(70, false)) + .pipe(cuda_compute(72, false)) + .pipe(cuda_compute(75, true)) + .pipe(cuda_compute(80, true)) + .pipe(cuda_compute(86, true)) + .pipe(cuda_compute(89, true)) .files(cuda_files) + .file(cuda_dir.join("Register.cpp")) .try_compile("MNNCuda") .change_context(Error) .attach_printable("Failed to compile MNNCuda")?; @@ -1061,8 +1081,13 @@ pub fn cuda(mut build: cc::Build, vendor: impl AsRef) -> Result Ok(build) } -pub fn find_cuda() -> Result { - std::env::var("CUDA_PATH") - .change_context(Error) - .map(PathBuf::from) +pub fn cuda_compute(version: u8, enable: bool) -> impl FnOnce(&mut cc::Build) -> &mut cc::Build { + move |build: &mut cc::Build| { + if enable { + build.define(&format!("MNN_CUDA_ENABLE_SM{version}"), None); + } + build.flag(&format!( + "-gencode=arch=compute_{version},code=sm_{version}", + )) + } } diff --git a/src/schedule.rs b/src/schedule.rs index f32666c..8e73d1a 100644 --- a/src/schedule.rs +++ b/src/schedule.rs @@ -45,6 +45,9 @@ pub enum ForwardType { #[cfg(feature = "metal")] /// Use the Metal backend for computation. Metal, + #[cfg(feature = "cuda")] + /// Use the Metal backend for computation. + Cuda, #[cfg(feature = "opencl")] /// Use the OpenCL backend for computation. OpenCL, @@ -65,6 +68,8 @@ impl ForwardType { ForwardType::CPU => MNNForwardType::MNN_FORWARD_CPU, #[cfg(feature = "metal")] ForwardType::Metal => MNNForwardType::MNN_FORWARD_METAL, + #[cfg(feature = "cuda")] + ForwardType::Cuda => MNNForwardType::MNN_FORWARD_CUDA, #[cfg(feature = "opencl")] ForwardType::OpenCL => MNNForwardType::MNN_FORWARD_OPENCL, #[cfg(feature = "vulkan")] @@ -81,6 +86,8 @@ impl ForwardType { MNNForwardType::MNN_FORWARD_CPU => ForwardType::CPU, #[cfg(feature = "metal")] MNNForwardType::MNN_FORWARD_METAL => ForwardType::Metal, + #[cfg(feature = "cuda")] + MNNForwardType::MNN_FORWARD_CUDA => ForwardType::Cuda, #[cfg(feature = "opencl")] MNNForwardType::MNN_FORWARD_OPENCL => ForwardType::OpenCL, #[cfg(feature = "vulkan")] @@ -99,6 +106,8 @@ impl ForwardType { "cpu", #[cfg(feature = "metal")] "metal", + #[cfg(feature = "cuda")] + "cuda", #[cfg(feature = "opencl")] "opencl", #[cfg(feature = "vulkan")] @@ -116,6 +125,8 @@ impl ForwardType { ForwardType::CPU => "cpu", #[cfg(feature = "metal")] ForwardType::Metal => "metal", + #[cfg(feature = "cuda")] + ForwardType::Cuda => "cuda", #[cfg(feature = "opencl")] ForwardType::OpenCL => "opencl", #[cfg(feature = "vulkan")] @@ -136,6 +147,8 @@ impl core::str::FromStr for ForwardType { "cpu" => Ok(ForwardType::CPU), #[cfg(feature = "metal")] "metal" => Ok(ForwardType::Metal), + #[cfg(feature = "cuda")] + "cuda" => Ok(ForwardType::Cuda), #[cfg(feature = "opencl")] "opencl" => Ok(ForwardType::OpenCL), #[cfg(feature = "vulkan")] From 91bc91ba12883c659088e8938426bff59f26be37 Mon Sep 17 00:00:00 2001 From: uttarayan21 Date: Tue, 24 Dec 2024 18:55:05 +0530 Subject: [PATCH 03/12] feat: Only add cudatoolkit only on linux --- flake.nix | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/flake.nix b/flake.nix index 98b6a6f..e4e0023 100644 --- a/flake.nix +++ b/flake.nix @@ -205,10 +205,8 @@ // { MNN_SRC = null; LLDB_DEBUGSERVER_PATH = "/Applications/Xcode.app/Contents/SharedFrameworks/LLDB.framework/Versions/A/Resources/debugserver"; - CUDA_PATH = "${pkgs.cudatoolkit}"; packages = with pkgs; [ - cudatoolkit cargo-audit cargo-deny cargo-hakari @@ -226,10 +224,14 @@ ] ++ ( lib.optionals pkgs.stdenv.isLinux [ + cudatoolkit cargo-llvm-cov ] ); # ++ (with packages; [bencher inspect]); + } + // lib.optionalAttrs pkgs.stdenv.isLinux { + CUDA_PATH = "${pkgs.cudatoolkit}"; }); }; } From 3b8a74600c965e6516b7cd4d7e2815ea02b632aa Mon Sep 17 00:00:00 2001 From: uttarayan21 Date: Tue, 24 Dec 2024 19:01:23 +0530 Subject: [PATCH 04/12] fix: Rename cuda test --- benches/mnn-bench.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benches/mnn-bench.rs b/benches/mnn-bench.rs index 69737c0..7163d29 100644 --- a/benches/mnn-bench.rs +++ b/benches/mnn-bench.rs @@ -48,7 +48,7 @@ mod mnn_realesr_bench_with_ones { #[cfg(feature = "cuda")] #[divan::bench] - pub fn mnn_realesr_benchmark_metal(bencher: Bencher) { + pub fn mnn_realesr_benchmark_cuda(bencher: Bencher) { let net = Interpreter::from_file("tests/assets/realesr.mnn").unwrap(); let mut config = ScheduleConfig::new(); config.set_type(ForwardType::Cuda); From eee36d87a07f859662b13a4afabf724d1d3d918f Mon Sep 17 00:00:00 2001 From: Uttarayan Mondal Date: Wed, 25 Dec 2024 03:04:06 +0530 Subject: [PATCH 05/12] fix: Fixed cuda on windows --- mnn-sys/build.rs | 8 +++++--- tools/bencher/Cargo.toml | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mnn-sys/build.rs b/mnn-sys/build.rs index 6205a73..e16748a 100644 --- a/mnn-sys/build.rs +++ b/mnn-sys/build.rs @@ -1048,14 +1048,16 @@ pub fn cuda(mut build: cc::Build, vendor: impl AsRef) -> Result .build() .flatten() .filter(|p| p.path().has_extension(["cpp", "cu"])) - .map(|e| e.into_path()); + .map(|e| e.into_path()) + .filter(|p| !p.components().any(|component| component.as_os_str().eq("plugin"))) + .filter(|p| !p.components().any(|component| component.as_os_str().eq("weight_only_quant"))); cc::Build::new() .cuda(true) .cudart("static") - .std("c++17") + .flag("--std=c++17") .flag("-O3") .includes(mnn_includes(vendor.as_ref())) - .include(vendor.as_ref().join("3rd_party/cutlass/v3_4_0/include")) + .include(vendor.as_ref().join("3rd_party/cutlass/v2_9_0/include")) .include(&cuda_dir) .pipe(|b| { if *TARGET_OS == "windows" { diff --git a/tools/bencher/Cargo.toml b/tools/bencher/Cargo.toml index b38025f..17f0993 100644 --- a/tools/bencher/Cargo.toml +++ b/tools/bencher/Cargo.toml @@ -11,7 +11,7 @@ mnn = { workspace = true, features = ["opencl", "serde", "metal"] } mnn = { workspace = true, features = ["opencl", "serde"] } [target."cfg(windows)".dependencies] -mnn = { workspace = true, features = ["opencl", "serde"] } +mnn = { workspace = true, features = ["opencl", "serde", "cuda"] } [dependencies] bytemuck = { version = "1.20.0", features = ["extern_crate_alloc"] } From f8392f2bd117b3a779dee137ca3ce04c6e60600b Mon Sep 17 00:00:00 2001 From: uttarayan21 Date: Thu, 2 Jan 2025 12:24:19 +0530 Subject: [PATCH 06/12] feat: Added MNN_CUDA define in all files --- flake.nix | 21 ++++++++++++++++----- mnn-sys/build.rs | 42 ++++++++++++++++++++++-------------------- 2 files changed, 38 insertions(+), 25 deletions(-) diff --git a/flake.nix b/flake.nix index e4e0023..d060ae1 100644 --- a/flake.nix +++ b/flake.nix @@ -53,6 +53,7 @@ enableMetal = true; enableOpencl = true; }; + # stdenv = final.clangStdenv; }) ]; }; @@ -68,7 +69,7 @@ extensions = ["rust-docs" "rust-src" "rust-analyzer"]; } // (lib.optionalAttrs pkgs.stdenv.isDarwin { - targets = ["aarch64-apple-darwin" "x86_64-apple-darwin"]; + targets = ["aarch64-apple-darwin" "x86_64-apple-darwin" "wasm32-unknown-unknown"]; })); craneLib = (crane.mkLib pkgs).overrideToolchain rustToolchain; craneLibLLvmTools = (crane.mkLib pkgs).overrideToolchain rustToolchainWithLLvmTools; @@ -201,10 +202,13 @@ }; devShells = { - default = pkgs.mkShell (commonArgs - // { + default = pkgs.mkShell ( + { MNN_SRC = null; LLDB_DEBUGSERVER_PATH = "/Applications/Xcode.app/Contents/SharedFrameworks/LLDB.framework/Versions/A/Resources/debugserver"; + LIBCLANG_PATH = commonArgs.LIBCLANG_PATH; + nativeBuildInputs = commonArgs.nativeBuildInputs; + buildINputs = commonArgs.buildInputs; packages = with pkgs; [ cargo-audit @@ -228,11 +232,18 @@ cargo-llvm-cov ] ); - # ++ (with packages; [bencher inspect]); } // lib.optionalAttrs pkgs.stdenv.isLinux { CUDA_PATH = "${pkgs.cudatoolkit}"; - }); + } + ); + wasm32 = pkgs.mkShell.override {stdenv = pkgs.clangStdenv;} { + MNN_SRC = null; + packages = with pkgs; [ + llvmPackages.lldb + rustToolchainWithRustAnalyzer + ]; + }; }; } ) diff --git a/mnn-sys/build.rs b/mnn-sys/build.rs index e16748a..5ab2de2 100644 --- a/mnn-sys/build.rs +++ b/mnn-sys/build.rs @@ -257,6 +257,7 @@ pub fn mnn_c_bindgen(vendor: impl AsRef, out: impl AsRef) -> Result< .clang_arg(CxxOption::METAL.cxx()) .clang_arg(CxxOption::COREML.cxx()) .clang_arg(CxxOption::OPENCL.cxx()) + .clang_arg(CxxOption::CUDA.cxx()) .pipe(|builder| { if is_emscripten() { println!("cargo:rustc-cdylib-link-arg=-fvisibility=default"); @@ -314,6 +315,7 @@ pub fn mnn_cpp_bindgen(vendor: impl AsRef, out: impl AsRef) -> Resul .clang_arg(CxxOption::METAL.cxx()) .clang_arg(CxxOption::COREML.cxx()) .clang_arg(CxxOption::OPENCL.cxx()) + .clang_arg(CxxOption::CUDA.cxx()) .clang_arg(format!("-I{}", vendor.join("include").to_string_lossy())) .generate_cstr(true) .generate_inline_functions(true) @@ -351,19 +353,17 @@ pub fn mnn_c_build(path: impl AsRef, vendor: impl AsRef) -> Result<( let vendor = vendor.as_ref(); cc::Build::new() .include(vendor.join("include")) - // .includes(vulkan_includes(vendor)) .pipe(|config| { - #[cfg(feature = "vulkan")] - config.define("MNN_VULKAN", "1"); - #[cfg(feature = "metal")] - config.define("MNN_METAL", "1"); - #[cfg(feature = "coreml")] - config.define("MNN_COREML", "1"); - #[cfg(feature = "opencl")] - config.define("MNN_OPENCL", "ON"); + CxxOption::COREML.define(config); + CxxOption::CUDA.define(config); + CxxOption::METAL.define(config); + CxxOption::OPENCL.define(config); + CxxOption::VULKAN.define(config); if is_emscripten() { config.compiler("emcc"); // We can't compile wasm32-unknown-unknown with emscripten + // emscripten works with cpu backend only so we are not sure if it would work with + // others at all config.target("wasm32-unknown-emscripten"); config.cpp_link_stdlib("c++-noexcept"); } @@ -1051,6 +1051,18 @@ pub fn cuda(mut build: cc::Build, vendor: impl AsRef) -> Result .map(|e| e.into_path()) .filter(|p| !p.components().any(|component| component.as_os_str().eq("plugin"))) .filter(|p| !p.components().any(|component| component.as_os_str().eq("weight_only_quant"))); + + fn cuda_compute(version: u8, enable: bool) -> impl FnOnce(&mut cc::Build) -> &mut cc::Build { + move |build: &mut cc::Build| { + if enable { + build.define(&format!("MNN_CUDA_ENABLE_SM{version}"), None); + } + build.flag(&format!( + "-gencode=arch=compute_{version},code=sm_{version}", + )) + } + } + cc::Build::new() .cuda(true) .cudart("static") @@ -1080,16 +1092,6 @@ pub fn cuda(mut build: cc::Build, vendor: impl AsRef) -> Result .change_context(Error) .attach_printable("Failed to compile MNNCuda")?; build.define("MNN_CUDA_ENABLED", "1"); + CxxOption::CUDA.define(&mut build); Ok(build) } - -pub fn cuda_compute(version: u8, enable: bool) -> impl FnOnce(&mut cc::Build) -> &mut cc::Build { - move |build: &mut cc::Build| { - if enable { - build.define(&format!("MNN_CUDA_ENABLE_SM{version}"), None); - } - build.flag(&format!( - "-gencode=arch=compute_{version},code=sm_{version}", - )) - } -} From 31d769a605241798583e5ab605835e1aa3b2183a Mon Sep 17 00:00:00 2001 From: uttarayan21 Date: Thu, 2 Jan 2025 12:25:27 +0530 Subject: [PATCH 07/12] feat: Removed cuda from default --- Cargo.toml | 2 +- tools/bencher/Cargo.toml | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c1ff8b3..93355f1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,7 +45,7 @@ serde = ["dep:serde"] simd = ["mnn-sys/simd"] -default = ["simd"] +# default = ["simd"] [dev-dependencies] diff --git a/tools/bencher/Cargo.toml b/tools/bencher/Cargo.toml index 17f0993..0a9b1d1 100644 --- a/tools/bencher/Cargo.toml +++ b/tools/bencher/Cargo.toml @@ -11,7 +11,10 @@ mnn = { workspace = true, features = ["opencl", "serde", "metal"] } mnn = { workspace = true, features = ["opencl", "serde"] } [target."cfg(windows)".dependencies] -mnn = { workspace = true, features = ["opencl", "serde", "cuda"] } +mnn = { workspace = true, features = [ + "cuda", + "serde", +], default-features = false } [dependencies] bytemuck = { version = "1.20.0", features = ["extern_crate_alloc"] } From 9876f8aa51272a2ced3f659902d177baa165a779 Mon Sep 17 00:00:00 2001 From: uttarayan21 Date: Thu, 2 Jan 2025 14:48:14 +0530 Subject: [PATCH 08/12] feat: Added clang stdenv to not need LIBCLANG_PATH --- flake.nix | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/flake.nix b/flake.nix index d060ae1..5587b67 100644 --- a/flake.nix +++ b/flake.nix @@ -53,7 +53,6 @@ enableMetal = true; enableOpencl = true; }; - # stdenv = final.clangStdenv; }) ]; }; @@ -71,10 +70,11 @@ // (lib.optionalAttrs pkgs.stdenv.isDarwin { targets = ["aarch64-apple-darwin" "x86_64-apple-darwin" "wasm32-unknown-unknown"]; })); + nightlyToolchain = pkgs.rust-bin.nightly.latest.default; craneLib = (crane.mkLib pkgs).overrideToolchain rustToolchain; craneLibLLvmTools = (crane.mkLib pkgs).overrideToolchain rustToolchainWithLLvmTools; - src = lib.sources.sourceFilesBySuffices ./. [".rs" ".toml" ".patch" ".mnn" ".h" ".cpp" ".svg" "lock"]; + src = lib.sources.sourceFilesBySuffices ./. [".rs" ".toml" ".patch" ".mnn" ".h" ".cpp" ".svg" ".lock"]; MNN_SRC = pkgs.applyPatches { name = "mnn-src"; src = mnn-src; @@ -82,18 +82,14 @@ }; commonArgs = { inherit src MNN_SRC; + stdenv = pkgs.clangStdenv; pname = "mnn"; doCheck = false; - LIBCLANG_PATH = "${pkgs.llvmPackages.libclang.lib}/lib"; nativeBuildInputs = with pkgs; [ - cmake - llvmPackages.libclang.lib - clang pkg-config ]; buildInputs = with pkgs; - [] - ++ (lib.optionals pkgs.stdenv.isLinux [ + (lib.optionals pkgs.stdenv.isLinux [ ocl-icd opencl-headers ]) @@ -159,13 +155,10 @@ # name = "mnn-leaks"; # cargoLock = { # lockFile = ./Cargo.lock; - # outputHashes = { - # "cmake-0.1.50" = "sha256-GM2D7dpb2i2S6qYVM4HYk5B40TwKCmGQnUPfXksyf0M="; - # }; # }; # # buildPhase = '' - # cargo test --target aarch64-apple-darwin + # cargo test --profile rwd --target aarch64-apple-darwin # ''; # RUSTFLAGS = "-Zsanitizer=address"; # ASAN_OPTIONS = "detect_leaks=1"; @@ -202,11 +195,10 @@ }; devShells = { - default = pkgs.mkShell ( + default = pkgs.mkShell.override {stdenv = pkgs.clangStdenv;} ( { MNN_SRC = null; LLDB_DEBUGSERVER_PATH = "/Applications/Xcode.app/Contents/SharedFrameworks/LLDB.framework/Versions/A/Resources/debugserver"; - LIBCLANG_PATH = commonArgs.LIBCLANG_PATH; nativeBuildInputs = commonArgs.nativeBuildInputs; buildINputs = commonArgs.buildInputs; packages = with pkgs; @@ -237,13 +229,6 @@ CUDA_PATH = "${pkgs.cudatoolkit}"; } ); - wasm32 = pkgs.mkShell.override {stdenv = pkgs.clangStdenv;} { - MNN_SRC = null; - packages = with pkgs; [ - llvmPackages.lldb - rustToolchainWithRustAnalyzer - ]; - }; }; } ) From 0164b678b2353436da9efa3e0e22184752924eaa Mon Sep 17 00:00:00 2001 From: uttarayan21 Date: Thu, 2 Jan 2025 15:38:28 +0530 Subject: [PATCH 09/12] chore: Remove unused comments and flags --- flake.nix | 2 -- mnn-sys/build.rs | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/flake.nix b/flake.nix index 5587b67..895241d 100644 --- a/flake.nix +++ b/flake.nix @@ -162,8 +162,6 @@ # ''; # RUSTFLAGS = "-Zsanitizer=address"; # ASAN_OPTIONS = "detect_leaks=1"; - # # MNN_COMPILE = "NO"; - # # MNN_LIB_DIR = "${pkgs.mnn}/lib"; # } # ); } diff --git a/mnn-sys/build.rs b/mnn-sys/build.rs index 5ab2de2..e7ae613 100644 --- a/mnn-sys/build.rs +++ b/mnn-sys/build.rs @@ -1067,7 +1067,7 @@ pub fn cuda(mut build: cc::Build, vendor: impl AsRef) -> Result .cuda(true) .cudart("static") .flag("--std=c++17") - .flag("-O3") + // .flag("-O3") .includes(mnn_includes(vendor.as_ref())) .include(vendor.as_ref().join("3rd_party/cutlass/v2_9_0/include")) .include(&cuda_dir) From 51979fc874d82c8cf2905ed7b8dd828fbd369529 Mon Sep 17 00:00:00 2001 From: uttarayan21 Date: Thu, 2 Jan 2025 20:45:34 +0530 Subject: [PATCH 10/12] fix: LIBCLANG_PATH is needed for linux --- flake.nix | 4 ++++ tools/bencher/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/flake.nix b/flake.nix index 895241d..71d47b8 100644 --- a/flake.nix +++ b/flake.nix @@ -87,11 +87,14 @@ doCheck = false; nativeBuildInputs = with pkgs; [ pkg-config + libclang.lib ]; + LIBCLANG_PATH = "${pkgs.libclang.lib}/lib"; buildInputs = with pkgs; (lib.optionals pkgs.stdenv.isLinux [ ocl-icd opencl-headers + cudatoolkit ]) ++ (lib.optionals pkgs.stdenv.isDarwin [ apple-sdk_13 @@ -199,6 +202,7 @@ LLDB_DEBUGSERVER_PATH = "/Applications/Xcode.app/Contents/SharedFrameworks/LLDB.framework/Versions/A/Resources/debugserver"; nativeBuildInputs = commonArgs.nativeBuildInputs; buildINputs = commonArgs.buildInputs; + LIBCLANG_PATH = commonArgs.LIBCLANG_PATH; packages = with pkgs; [ cargo-audit diff --git a/tools/bencher/Cargo.toml b/tools/bencher/Cargo.toml index 0a9b1d1..17b67b6 100644 --- a/tools/bencher/Cargo.toml +++ b/tools/bencher/Cargo.toml @@ -10,7 +10,7 @@ mnn = { workspace = true, features = ["opencl", "serde", "metal"] } [target."x86_64-apple-darwin".dependencies] mnn = { workspace = true, features = ["opencl", "serde"] } -[target."cfg(windows)".dependencies] +[target."cfg(not(target_os = \"macos\"))".dependencies] mnn = { workspace = true, features = [ "cuda", "serde", From 61edc46498d5d8bcad4c40525e0ce492ba3f1b68 Mon Sep 17 00:00:00 2001 From: uttarayan21 Date: Fri, 3 Jan 2025 00:19:59 +0530 Subject: [PATCH 11/12] feat: Add cuda more compile flags --- flake.nix | 19 ++++++++---- mnn-sys/build.rs | 65 ++++++++++++++++++++++++++++----------- mnn-sys/vendor | 2 +- src/schedule.rs | 19 ++++++++++++ tools/bencher/src/main.rs | 15 +++++++-- 5 files changed, 93 insertions(+), 27 deletions(-) diff --git a/flake.nix b/flake.nix index 71d47b8..d8c0b82 100644 --- a/flake.nix +++ b/flake.nix @@ -85,16 +85,22 @@ stdenv = pkgs.clangStdenv; pname = "mnn"; doCheck = false; - nativeBuildInputs = with pkgs; [ - pkg-config - libclang.lib - ]; + nativeBuildInputs = with pkgs; + [ + pkg-config + libclang.lib + ] + ++ (lib.optionals pkgs.stdenv.isLinux [ + cudatoolkit + ]); LIBCLANG_PATH = "${pkgs.libclang.lib}/lib"; buildInputs = with pkgs; (lib.optionals pkgs.stdenv.isLinux [ ocl-icd opencl-headers - cudatoolkit + (lib.getDev cudaPackages.cuda_cudart) + (lib.getLib cudaPackages.cuda_cudart) + (lib.getStatic cudaPackages.cuda_cudart) ]) ++ (lib.optionals pkgs.stdenv.isDarwin [ apple-sdk_13 @@ -201,7 +207,7 @@ MNN_SRC = null; LLDB_DEBUGSERVER_PATH = "/Applications/Xcode.app/Contents/SharedFrameworks/LLDB.framework/Versions/A/Resources/debugserver"; nativeBuildInputs = commonArgs.nativeBuildInputs; - buildINputs = commonArgs.buildInputs; + buildInputs = commonArgs.buildInputs; LIBCLANG_PATH = commonArgs.LIBCLANG_PATH; packages = with pkgs; [ @@ -219,6 +225,7 @@ rust-bindgen google-cloud-sdk rustToolchainWithRustAnalyzer + cppcheck ] ++ ( lib.optionals pkgs.stdenv.isLinux [ diff --git a/mnn-sys/build.rs b/mnn-sys/build.rs index e7ae613..aa5c136 100644 --- a/mnn-sys/build.rs +++ b/mnn-sys/build.rs @@ -329,9 +329,12 @@ pub fn mnn_cpp_bindgen(vendor: impl AsRef, out: impl AsRef) -> Resul .join("Interpreter.hpp") .to_string_lossy(), ) + // .header( + // vendor + // .join("include/MNN/MNNSharedContext.h") + // .to_string_lossy(), + // ) .allowlist_item(".*SessionInfoCode.*"); - // let cmd = bindings.command_line_flags().join(" "); - // println!("cargo:warn=bindgen: {}", cmd); let bindings = bindings.generate().change_context(Error)?; bindings .write_to_file(out.as_ref().join("mnn_cpp.rs")) @@ -622,6 +625,7 @@ pub fn mnn_cpp_build(vendor: impl AsRef) -> Result<()> { // CxxOption::VULKAN.define(&mut build); // CxxOption::COREML.define(&mut build); + CxxOption::CUDA.define(&mut build); CxxOption::METAL.define(&mut build); CxxOption::OPENCL.define(&mut build); CxxOption::CRT_STATIC.define(&mut build); @@ -1043,30 +1047,44 @@ pub fn cc_builder() -> cc::Build { pub fn cuda(mut build: cc::Build, vendor: impl AsRef) -> Result { let cuda_dir = vendor.as_ref().join("source/backend/cuda"); - let cuda_files = ignore::WalkBuilder::new(cuda_dir.join("core")) - .add(cuda_dir.join("execution")) - .build() - .flatten() - .filter(|p| p.path().has_extension(["cpp", "cu"])) - .map(|e| e.into_path()) - .filter(|p| !p.components().any(|component| component.as_os_str().eq("plugin"))) - .filter(|p| !p.components().any(|component| component.as_os_str().eq("weight_only_quant"))); + let (cuda_files_cu, cuda_files_cpp): (Vec<_>, Vec<_>) = + ignore::WalkBuilder::new(cuda_dir.join("core")) + .add(cuda_dir.join("execution")) + .build() + .flatten() + .filter(|p| p.path().has_extension(["cpp", "cu"])) + .map(|e| e.into_path()) + .filter(|p| { + !p.components() + .any(|component| component.as_os_str().eq("plugin")) + }) + .filter(|p| { + !p.components() + .any(|component| component.as_os_str().eq("weight_only_quant")) + }) + .partition(|p| p.has_extension(["cu"])); fn cuda_compute(version: u8, enable: bool) -> impl FnOnce(&mut cc::Build) -> &mut cc::Build { move |build: &mut cc::Build| { if enable { build.define(&format!("MNN_CUDA_ENABLE_SM{version}"), None); } - build.flag(&format!( - "-gencode=arch=compute_{version},code=sm_{version}", - )) + build.flag("-gencode"); + build.flag(&format!("arch=compute_{version},code=sm_{version}",)) } } - cc::Build::new() + let cuda_objects = cc::Build::new() .cuda(true) .cudart("static") - .flag("--std=c++17") + .flag("-m64") + .flag("--std") + .flag("c++11") + .flag("-w") + .flag("-O3") + .flag("-g") + .define("MNN_Cuda_Main_EXPORTS", None) + // .flag("--std=c++17") // .flag("-O3") .includes(mnn_includes(vendor.as_ref())) .include(vendor.as_ref().join("3rd_party/cutlass/v2_9_0/include")) @@ -1086,12 +1104,23 @@ pub fn cuda(mut build: cc::Build, vendor: impl AsRef) -> Result .pipe(cuda_compute(80, true)) .pipe(cuda_compute(86, true)) .pipe(cuda_compute(89, true)) - .files(cuda_files) + .files(cuda_files_cu) + .try_compile_intermediates() + .change_context(Error) + .attach_printable("Failed to compile MNNCuda")?; + + cc_builder() + .includes(mnn_includes(vendor.as_ref())) + .include(vendor.as_ref().join("3rd_party/cutlass/v2_9_0/include")) + .include(&cuda_dir) .file(cuda_dir.join("Register.cpp")) + .files(cuda_files_cpp) + .objects(cuda_objects) + .cargo_debug(true) .try_compile("MNNCuda") .change_context(Error) - .attach_printable("Failed to compile MNNCuda")?; - build.define("MNN_CUDA_ENABLED", "1"); + .attach_printable("Failed to compile cuda/Register.cpp")?; + CxxOption::CUDA.define(&mut build); Ok(build) } diff --git a/mnn-sys/vendor b/mnn-sys/vendor index 707b8a4..dd43b5a 160000 --- a/mnn-sys/vendor +++ b/mnn-sys/vendor @@ -1 +1 @@ -Subproject commit 707b8a41b25e3d0b7c4a39cd81109d7074ca3c28 +Subproject commit dd43b5aa4b157d892b2ef8c78a5c921024709539 diff --git a/src/schedule.rs b/src/schedule.rs index 8e73d1a..b6f397b 100644 --- a/src/schedule.rs +++ b/src/schedule.rs @@ -477,3 +477,22 @@ impl FromIterator for ScheduleConfigs { } unsafe impl Send for ScheduleConfigs {} +// +// #[derive(Debug, Clone)] +// pub enum UserDeviceContext { +// OpenCL(OpenCLContext), +// Cuda(CudaContext), +// } +// +// #[derive(Debug, Clone)] +// pub struct OpenCLContext { +// pub device_id: u32, +// pub platform_id: u32, +// pub context_ptr: *mut core::ffi::c_void, +// pub gl_shared: *mut core::ffi::c_void, +// } +// +// #[derive(Debug, Clone)] +// pub struct CudaContext { +// pub device_id: u32, +// } diff --git a/tools/bencher/src/main.rs b/tools/bencher/src/main.rs index 87f9f49..dfa0854 100644 --- a/tools/bencher/src/main.rs +++ b/tools/bencher/src/main.rs @@ -211,7 +211,9 @@ impl ScheduleConfigItem { bc.set_power_mode(self.power); bc.set_precision_mode(self.precision); bc.set_memory_mode(self.memory); - sc.set_type(self.forward).set_backend_config(bc); + sc.set_type(self.forward) + .set_backup_type(self.forward) + .set_backend_config(bc); sc } } @@ -306,7 +308,16 @@ pub fn main() -> Result<()> { // let indicatif_layer = IndicatifLayer::new(); tracing_subscriber::registry() .with(cli.verbose.tracing_level_filter()) - .with(tracing_subscriber::fmt::layer().with_writer(Term::stderr)) + .with( + tracing_subscriber::fmt::layer() + .event_format( + tracing_subscriber::fmt::format() + .with_line_number(true) + .with_ansi(true) + .with_file(true), + ) + .with_writer(Term::stderr), + ) .init(); match cli.subcommand { From ab4d50e017ea2c27370d821440b2c98eb6cb7281 Mon Sep 17 00:00:00 2001 From: uttarayan21 Date: Mon, 6 Jan 2025 18:43:59 +0530 Subject: [PATCH 12/12] feat: remove cppcheck and added mnn --- flake.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flake.nix b/flake.nix index d8c0b82..86e00b6 100644 --- a/flake.nix +++ b/flake.nix @@ -225,7 +225,7 @@ rust-bindgen google-cloud-sdk rustToolchainWithRustAnalyzer - cppcheck + mnn ] ++ ( lib.optionals pkgs.stdenv.isLinux [