Merge pull request #626 from SciML/cudss

ChrisRackauckas · web-flow · commit bc0f4ee40063 · 2025-06-20T13:12:03.000Z
Fix CUDSS dispatches
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -12,6 +12,6 @@ steps:
       GROUP: 'LinearSolveCUDA'
       JULIA_PKG_SERVER: "" # it often struggles with our large artifacts
       # SECRET_CODECOV_TOKEN: "..."
-    timeout_in_minutes: 30
+    timeout_in_minutes: 180
     # Don't run Buildkite if the commit message includes the text [skip tests]
     if: build.message !~ /\[skip tests\]/
diff --git a/ext/LinearSolveCUDAExt.jl b/ext/LinearSolveCUDAExt.jl
@@ -5,6 +5,10 @@ using LinearSolve
 using LinearSolve.LinearAlgebra, LinearSolve.SciMLBase, LinearSolve.ArrayInterface
 using SciMLBase: AbstractSciMLOperator
 
+function LinearSolve.is_cusparse(A::Union{CUDA.CUSPARSE.CuSparseMatrixCSR, CUDA.CUSPARSE.CuSparseMatrixCSC})
+    true
+end
+
 function LinearSolve.defaultalg(A::CUDA.CUSPARSE.CuSparseMatrixCSR{Tv, Ti}, b,
         assump::OperatorAssumptions{Bool}) where {Tv, Ti}
     if LinearSolve.cudss_loaded(A)
diff --git a/ext/LinearSolveSparseArraysExt.jl b/ext/LinearSolveSparseArraysExt.jl
@@ -3,7 +3,7 @@ module LinearSolveSparseArraysExt
 using LinearSolve, LinearAlgebra
 using SparseArrays
 using SparseArrays: AbstractSparseMatrixCSC, nonzeros, rowvals, getcolptr
-using LinearSolve: BLASELTYPES, pattern_changed
+using LinearSolve: BLASELTYPES, pattern_changed, ArrayInterface
 
 # Can't `using KLU` because cannot have a dependency in there without
 # requiring the user does `using KLU`
@@ -100,15 +100,31 @@ function LinearSolve.init_cacheval(
         Pl, Pr,
         maxiters::Int, abstol, reltol,
         verbose::Bool, assumptions::OperatorAssumptions) where {T<:BLASELTYPES}
-    SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC{T, Int64}(zero(Int64), zero(Int64), [Int64(1)], Int64[], T[]))
+    if is_cusparse(A)
+        ArrayInterface.lu_instance(A)
+    else
+        SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC{T, Int64}(zero(Int64), zero(Int64), [Int64(1)], Int64[], T[]))
+    end
 end
 
 function LinearSolve.init_cacheval(
         alg::LUFactorization, A::AbstractSparseArray{T, Int32}, b, u,
         Pl, Pr,
         maxiters::Int, abstol, reltol,
         verbose::Bool, assumptions::OperatorAssumptions) where {T<:BLASELTYPES}
-    SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC{T, Int32}(zero(Int32), zero(Int32), [Int32(1)], Int32[], T[]))
+    if LinearSolve.is_cusparse(A)
+        ArrayInterface.lu_instance(A)
+    else
+        SparseArrays.UMFPACK.UmfpackLU(SparseMatrixCSC{T, Int32}(zero(Int32), zero(Int32), [Int32(1)], Int32[], T[]))
+    end
+end
+
+function LinearSolve.init_cacheval(
+        alg::LUFactorization, A::LinearSolve.GPUArraysCore.AnyGPUArray, b, u,
+        Pl, Pr,
+        maxiters::Int, abstol, reltol,
+        verbose::Bool, assumptions::OperatorAssumptions)
+    ArrayInterface.lu_instance(A)
 end
 
 function LinearSolve.init_cacheval(
@@ -120,6 +136,14 @@ function LinearSolve.init_cacheval(
     PREALLOCATED_UMFPACK
 end
 
+function LinearSolve.init_cacheval(
+        alg::UMFPACKFactorization, A::LinearSolve.GPUArraysCore.AnyGPUArray, b, u,
+        Pl, Pr,
+        maxiters::Int, abstol, reltol,
+        verbose::Bool, assumptions::OperatorAssumptions)
+    nothing
+end
+
 function LinearSolve.init_cacheval(
         alg::UMFPACKFactorization, A::AbstractSparseArray{T, Int64}, b, u,
         Pl, Pr,
@@ -191,6 +215,14 @@ function LinearSolve.init_cacheval(
     PREALLOCATED_KLU
 end
 
+function LinearSolve.init_cacheval(
+        alg::KLUFactorization, A::LinearSolve.GPUArraysCore.AnyGPUArray, b, u,
+        Pl, Pr,
+        maxiters::Int, abstol, reltol,
+        verbose::Bool, assumptions::OperatorAssumptions)
+    nothing
+end
+
 function LinearSolve.init_cacheval(
         alg::KLUFactorization, A::AbstractSparseArray{Float64, Int32}, b, u, Pl, Pr,
         maxiters::Int, abstol,
diff --git a/src/LinearSolve.jl b/src/LinearSolve.jl
@@ -217,6 +217,7 @@ end
 ALREADY_WARNED_CUDSS = Ref{Bool}(false)
 error_no_cudss_lu(A) = nothing
 cudss_loaded(A) = false
+is_cusparse(A) = false
 
 export LUFactorization, SVDFactorization, QRFactorization, GenericFactorization,
        GenericLUFactorization, SimpleLUFactorization, RFLUFactorization,
diff --git a/test/gpu/Project.toml b/test/gpu/Project.toml
@@ -1,6 +1,7 @@
 [deps]
 BlockDiagonals = "0a1fb500-61f7-11e9-3c65-f5ef3456f9f0"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LinearSolve = "7ed4a6bd-45f5-4d41-b270-4a48e9bafcae"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
diff --git a/test/gpu/cuda.jl b/test/gpu/cuda.jl
@@ -1,4 +1,5 @@
 using LinearSolve, CUDA, LinearAlgebra, SparseArrays, StableRNGs
+using CUDA.CUSPARSE, CUDSS
 using Test
 
 CUDA.allowscalar(false)
@@ -91,3 +92,17 @@ prob2 = LinearProblem(transpose(A), b)
     sol = solve(prob2, alg; alias = LinearAliasSpecifier(alias_A = false))
     @test norm(transpose(A) * sol.u .- b) < 1e-5
 end
+
+@testset "CUDSS" begin
+    T = Float32
+    n = 100
+    A_cpu = sprand(T, n, n, 0.05) + I
+    x_cpu = zeros(T, n)
+    b_cpu = rand(T, n)
+
+    A_gpu_csr = CuSparseMatrixCSR(A_cpu)
+    b_gpu = CuVector(b_cpu)
+
+    prob = LinearProblem(A_gpu_csr, b_gpu)
+    sol = solve(prob)
+end