Skip to content

Commit fac276d

Browse files
committed
[SPARSE] Add support for cuSPARSE backend
1 parent c9d0b47 commit fac276d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+3635
-263
lines changed

CMakeLists.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ option(ENABLE_CUFFT_BACKEND "Enable the cuFFT backend for the DFT interface" OFF
6060
option(ENABLE_ROCFFT_BACKEND "Enable the rocFFT backend for the DFT interface" OFF)
6161
option(ENABLE_PORTFFT_BACKEND "Enable the portFFT DFT backend for the DFT interface. Cannot be used with other DFT backends." OFF)
6262

63+
# sparse
64+
option(ENABLE_CUSPARSE_BACKEND "Enable the cuSPARSE backend for the SPARSE_BLAS interface" OFF)
65+
6366
set(ONEMKL_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler")
6467
set(HIP_TARGETS "" CACHE STRING "Target HIP architectures")
6568

@@ -102,7 +105,8 @@ if(ENABLE_MKLGPU_BACKEND
102105
list(APPEND DOMAINS_LIST "dft")
103106
endif()
104107
if(ENABLE_MKLCPU_BACKEND
105-
OR ENABLE_MKLGPU_BACKEND)
108+
OR ENABLE_MKLGPU_BACKEND
109+
OR ENABLE_CUSPARSE_BACKEND)
106110
list(APPEND DOMAINS_LIST "sparse_blas")
107111
endif()
108112

@@ -129,7 +133,7 @@ if(CMAKE_CXX_COMPILER OR NOT ONEMKL_SYCL_IMPLEMENTATION STREQUAL "dpc++")
129133
string(REPLACE "\\" "/" CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER})
130134
endif()
131135
else()
132-
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUFFT_BACKEND
136+
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_CUSPARSE_BACKEND
133137
OR ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCFFT_BACKEND)
134138
set(CMAKE_CXX_COMPILER "clang++")
135139
elseif(ENABLE_MKLGPU_BACKEND)

cmake/FindCompiler.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ if(is_dpcpp)
3737
# Check if the Nvidia target is supported. PortFFT uses this for choosing default configuration.
3838
check_cxx_compiler_flag("-fsycl -fsycl-targets=nvptx64-nvidia-cuda" dpcpp_supports_nvptx64)
3939

40-
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND)
40+
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND)
4141
list(APPEND UNIX_INTERFACE_COMPILE_OPTIONS
4242
-fsycl-targets=nvptx64-nvidia-cuda -fsycl-unnamed-lambda)
4343
list(APPEND UNIX_INTERFACE_LINK_OPTIONS
@@ -51,7 +51,7 @@ if(is_dpcpp)
5151
-fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend
5252
--offload-arch=${HIP_TARGETS})
5353
endif()
54-
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_ROCBLAS_BACKEND
54+
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND OR ENABLE_ROCBLAS_BACKEND
5555
OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
5656
set_target_properties(ONEMKL::SYCL::SYCL PROPERTIES
5757
INTERFACE_COMPILE_OPTIONS "${UNIX_INTERFACE_COMPILE_OPTIONS}"

docs/building_the_project_with_dpcpp.rst

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,9 @@ The most important supported build options are:
104104
* - ENABLE_CURAND_BACKEND
105105
- True, False
106106
- False
107+
* - ENABLE_CUSPARSE_BACKEND
108+
- True, False
109+
- False
107110
* - ENABLE_NETLIB_BACKEND
108111
- True, False
109112
- False
@@ -183,8 +186,8 @@ Building for CUDA
183186
^^^^^^^^^^^^^^^^^
184187

185188
The CUDA backends can be enabled with ``ENABLE_CUBLAS_BACKEND``,
186-
``ENABLE_CUFFT_BACKEND``, ``ENABLE_CURAND_BACKEND``, and
187-
``ENABLE_CUSOLVER_BACKEND``.
189+
``ENABLE_CUFFT_BACKEND``, ``ENABLE_CURAND_BACKEND``,
190+
``ENABLE_CUSOLVER_BACKEND``, and ``ENABLE_CUSPARSE_BACKEND``.
188191

189192
No additional parameters are required for using CUDA libraries. In most cases,
190193
the CUDA libraries should be found automatically by CMake.
@@ -356,6 +359,7 @@ disabled using the Ninja build system:
356359
-DENABLE_CUBLAS_BACKEND=True \
357360
-DENABLE_CUSOLVER_BACKEND=True \
358361
-DENABLE_CURAND_BACKEND=True \
362+
-DENABLE_CUSPARSE_BACKEND=True \
359363
-DBUILD_FUNCTIONAL_TESTS=False
360364
361365
``$ONEMKL_DIR`` points at the oneMKL source directly. The x86 CPU (``MKLCPU``)

docs/domains/sparse_linear_algebra.rst

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,139 @@ Currently known limitations:
3838
``oneapi::mkl::unimplemented`` exception.
3939
- Scalar parameters ``alpha`` and ``beta`` should be host pointers to prevent
4040
synchronizations and copies to the host.
41+
42+
43+
cuSPARSE backend
44+
----------------
45+
46+
Currently known limitations:
47+
48+
- Using ``spmv`` with a ``type_view`` other than ``matrix_descr::general`` will
49+
throw an ``oneapi::mkl::unimplemented`` exception.
50+
- The COO format requires the indices to be sorted by row. See the `cuSPARSE
51+
documentation
52+
<https://docs.nvidia.com/cuda/cusparse/index.html#coordinate-coo>`_.
53+
54+
55+
Operation algorithms mapping
56+
----------------------------
57+
58+
The following tables describe how a oneMKL SYCL Interface algorithm maps to the
59+
backend's algorithms. Refer to the backend's documentation for a more detailed
60+
explanation of the algorithms.
61+
62+
Backends with no equivalent algorithms will fallback to the backend's default
63+
behavior.
64+
65+
66+
spmm
67+
^^^^
68+
69+
.. list-table::
70+
:header-rows: 1
71+
:widths: 10 30 45
72+
73+
* - Value
74+
- Description
75+
- Backend equivalent
76+
* - ``default_optimize_alg``
77+
- Default algorithm.
78+
- | MKL: none
79+
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
80+
* - ``no_optimize_alg``
81+
- Default algorithm but may skip some optimizations. Useful only if an
82+
operation with the same configuration is run once.
83+
- | MKL: none
84+
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
85+
* - ``coo_alg1``
86+
- Should provide best performance for COO format, small ``nnz`` and
87+
column-major layout.
88+
- | MKL: none
89+
| cuSPARSE: ``CUSPARSE_SPMM_COO_ALG1``
90+
* - ``coo_alg2``
91+
- Should provide best performance for COO format and column-major layout.
92+
Produces deterministic results.
93+
- | MKL: none
94+
| cuSPARSE: ``CUSPARSE_SPMM_COO_ALG2``
95+
* - ``coo_alg3``
96+
- Should provide best performance for COO format and large ``nnz``.
97+
- | MKL: none
98+
| cuSPARSE: ``CUSPARSE_SPMM_COO_ALG3``
99+
* - ``coo_alg4``
100+
- Should provide best performance for COO format and row-major layout.
101+
- | MKL: none
102+
| cuSPARSE: ``CUSPARSE_SPMM_COO_ALG4``
103+
* - ``csr_alg1``
104+
- Should provide best performance for CSR format and column-major layout.
105+
- | MKL: none
106+
| cuSPARSE: ``CUSPARSE_SPMM_CSR_ALG1``
107+
* - ``csr_alg2``
108+
- Should provide best performance for CSR format and row-major layout.
109+
- | MKL: none
110+
| cuSPARSE: ``CUSPARSE_SPMM_CSR_ALG2``
111+
* - ``csr_alg3``
112+
- Deterministic algorithm for CSR format.
113+
- | MKL: none
114+
| cuSPARSE: ``CUSPARSE_SPMM_CSR_ALG3``
115+
116+
117+
spmv
118+
^^^^
119+
120+
.. list-table::
121+
:header-rows: 1
122+
:widths: 10 30 45
123+
124+
* - Value
125+
- Description
126+
- Backend equivalent
127+
* - ``default_alg``
128+
- Default algorithm.
129+
- | MKL: none
130+
| cuSPARSE: ``CUSPARSE_SPMV_ALG_DEFAULT``
131+
* - ``no_optimize_alg``
132+
- Default algorithm but may skip some optimizations. Useful only if an
133+
operation with the same configuration is run once.
134+
- | MKL: none
135+
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
136+
* - ``coo_alg1``
137+
- Default algorithm for COO format.
138+
- | MKL: none
139+
| cuSPARSE: ``CUSPARSE_SPMV_COO_ALG1``
140+
* - ``coo_alg2``
141+
- Deterministic algorithm for COO format.
142+
- | MKL: none
143+
| cuSPARSE: ``CUSPARSE_SPMV_COO_ALG2``
144+
* - ``csr_alg1``
145+
- Default algorithm for CSR format.
146+
- | MKL: none
147+
| cuSPARSE: ``CUSPARSE_SPMV_CSR_ALG1``
148+
* - ``csr_alg2``
149+
- Deterministic algorithm for CSR format.
150+
- | MKL: none
151+
| cuSPARSE: ``CUSPARSE_SPMV_CSR_ALG2``
152+
* - ``csr_alg3``
153+
- LRB variant of the algorithm for CSR format.
154+
- | MKL: none
155+
| cuSPARSE: none
156+
157+
158+
spsv
159+
^^^^
160+
161+
.. list-table::
162+
:header-rows: 1
163+
:widths: 10 30 45
164+
165+
* - Value
166+
- Description
167+
- Backend equivalent
168+
* - ``default_optimize_alg``
169+
- Default algorithm.
170+
- | MKL: none
171+
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``
172+
* - ``no_optimize_alg``
173+
- Default algorithm but may skip some optimizations. Useful only if an
174+
operation with the same configuration is run once.
175+
- | MKL: none
176+
| cuSPARSE: ``CUSPARSE_SPMM_ALG_DEFAULT``

examples/sparse_blas/compile_time_dispatching/CMakeLists.txt

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,24 @@
1818
#===============================================================================
1919

2020
#Build object from all sources
21-
set(SPARSE_BLAS_BACKENDS "")
22-
23-
if(ENABLE_MKLCPU_BACKEND)
24-
list(APPEND SPARSE_BLAS_BACKENDS "mklcpu")
21+
set(SPARSE_CT_SOURCES "")
22+
if(ENABLE_MKLCPU_BACKEND AND ENABLE_CUSPARSE_BACKEND)
23+
list(APPEND SPARSE_CT_SOURCES "sparse_blas_spmv_usm_mklcpu_cusparse")
2524
endif()
2625

2726
include(WarningsUtils)
2827

29-
foreach(backend ${SPARSE_BLAS_BACKENDS})
30-
set(EXAMPLE_NAME example_sparse_blas_spmv_usm_${backend})
31-
add_executable(${EXAMPLE_NAME} sparse_blas_spmv_usm_${backend}.cpp)
32-
target_include_directories(${EXAMPLE_NAME}
28+
foreach(sparse_ct_source ${SPARSE_CT_SOURCES})
29+
add_executable(${sparse_ct_source} ${sparse_ct_source}.cpp)
30+
target_include_directories(${sparse_ct_source}
3331
PUBLIC ${PROJECT_SOURCE_DIR}/examples/include
3432
PUBLIC ${PROJECT_SOURCE_DIR}/include
3533
PUBLIC ${CMAKE_BINARY_DIR}/bin
3634
)
3735

38-
add_dependencies(${EXAMPLE_NAME} onemkl_sparse_blas_${backend})
39-
target_link_libraries(${EXAMPLE_NAME} PRIVATE ONEMKL::SYCL::SYCL onemkl_sparse_blas_${backend})
36+
target_link_libraries(${sparse_ct_source} PRIVATE ONEMKL::SYCL::SYCL onemkl_sparse_blas_mklcpu onemkl_sparse_blas_cusparse)
4037

4138
# Register example as ctest
42-
add_test(NAME sparse_blas/EXAMPLE/CT/sparse_blas_spmv_usm_${backend} COMMAND ${EXAMPLE_NAME})
43-
endforeach(backend)
39+
add_test(NAME sparse_blas/EXAMPLE/CT/${sparse_ct_source} COMMAND ${sparse_ct_source})
40+
endforeach(sparse_ct_source)
4441

0 commit comments

Comments
 (0)