Skip to content

Commit 044479d

Browse files
authored
[SPARSE] Add support for rocSPARSE backend (#544)
1 parent def5402 commit 044479d

38 files changed

+2998
-422
lines changed

CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ option(ENABLE_PORTFFT_BACKEND "Enable the portFFT DFT backend for the DFT interf
6262

6363
# sparse
6464
option(ENABLE_CUSPARSE_BACKEND "Enable the cuSPARSE backend for the SPARSE_BLAS interface" OFF)
65+
option(ENABLE_ROCSPARSE_BACKEND "Enable the rocSPARSE backend for the SPARSE_BLAS interface" OFF)
6566

6667
set(ONEMATH_SYCL_IMPLEMENTATION "dpc++" CACHE STRING "Name of the SYCL compiler")
6768
set(HIP_TARGETS "" CACHE STRING "Target HIP architectures")
@@ -106,7 +107,8 @@ if(ENABLE_MKLGPU_BACKEND
106107
endif()
107108
if(ENABLE_MKLCPU_BACKEND
108109
OR ENABLE_MKLGPU_BACKEND
109-
OR ENABLE_CUSPARSE_BACKEND)
110+
OR ENABLE_CUSPARSE_BACKEND
111+
OR ENABLE_ROCSPARSE_BACKEND)
110112
list(APPEND DOMAINS_LIST "sparse_blas")
111113
endif()
112114

@@ -134,7 +136,7 @@ if(CMAKE_CXX_COMPILER OR NOT ONEMATH_SYCL_IMPLEMENTATION STREQUAL "dpc++")
134136
endif()
135137
else()
136138
if(ENABLE_CUBLAS_BACKEND OR ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUFFT_BACKEND OR ENABLE_CUSPARSE_BACKEND
137-
OR ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCFFT_BACKEND)
139+
OR ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCFFT_BACKEND OR ENABLE_ROCSPARSE_BACKEND)
138140
set(CMAKE_CXX_COMPILER "clang++")
139141
elseif(ENABLE_MKLGPU_BACKEND)
140142
if(UNIX)

README.md

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ oneMath is part of the [UXL Foundation](http://www.uxlfoundation.org).
1818
</thead>
1919
<tbody>
2020
<tr>
21-
<td rowspan=13 align="center">oneMath</td>
22-
<td rowspan=13 align="center">oneMath selector</td>
21+
<td rowspan=14 align="center">oneMath</td>
22+
<td rowspan=14 align="center">oneMath selector</td>
2323
<td align="center"><a href="https://software.intel.com/en-us/oneapi/onemkl">Intel(R) oneAPI Math Kernel Library (oneMKL)</a></td>
2424
<td align="center">x86 CPU, Intel GPU</td>
2525
</tr>
@@ -61,7 +61,11 @@ oneMath is part of the [UXL Foundation](http://www.uxlfoundation.org).
6161
<td align="center">AMD GPU</td>
6262
</tr>
6363
<tr>
64-
<td align="center"><a href="https://github.com/ROCmSoftwarePlatform/rocFFT">AMD rocFFT</a></td>
64+
<td align="center"><a href="https://github.com/ROCmSoftwarePlatform/rocFFT"> AMD rocFFT</a></td>
65+
<td align="center">AMD GPU</td>
66+
</tr>
67+
<tr>
68+
<td align="center"><a href="https://github.com/ROCmSoftwarePlatform/rocSPARSE"> AMD rocSPARSE</a></td>
6569
<td align="center">AMD GPU</td>
6670
</tr>
6771
<tr>
@@ -333,7 +337,7 @@ Supported compilers include:
333337
<td align="center">Dynamic, Static</td>
334338
</tr>
335339
<tr>
336-
<td rowspan=3 align="center">SPARSE_BLAS</td>
340+
<td rowspan=4 align="center">SPARSE_BLAS</td>
337341
<td align="center">x86 CPU</td>
338342
<td align="center">Intel(R) oneMKL</td>
339343
<td align="center">Intel DPC++</td>
@@ -351,6 +355,12 @@ Supported compilers include:
351355
<td align="center">Open DPC++</td>
352356
<td align="center">Dynamic, Static</td>
353357
</tr>
358+
<tr>
359+
<td align="center">AMD GPU</td>
360+
<td align="center">AMD rocSPARSE</td>
361+
<td align="center">Open DPC++</td>
362+
<td align="center">Dynamic, Static</td>
363+
</tr>
354364
</tbody>
355365
</table>
356366

@@ -537,6 +547,7 @@ Product | Supported Version | License
537547
[AMD rocRAND](https://github.com/ROCm/rocRAND) | 5.1.0 | [AMD License](https://github.com/ROCm/rocRAND/blob/develop/LICENSE.txt)
538548
[AMD rocSOLVER](https://github.com/ROCm/rocSOLVER) | 5.0.0 | [AMD License](https://github.com/ROCm/rocSOLVER/blob/develop/LICENSE.md)
539549
[AMD rocFFT](https://github.com/ROCm/rocFFT) | rocm-5.4.3 | [AMD License](https://github.com/ROCm/rocFFT/blob/rocm-5.4.3/LICENSE.md)
550+
[AMD rocSPARSE](https://github.com/ROCm/rocSPARSE) | 3.1.2 | [AMD License](https://github.com/ROCm/rocSPARSE/blob/develop/LICENSE.md)
540551
[NETLIB LAPACK](https://www.netlib.org/) | [5d4180c](https://github.com/Reference-LAPACK/lapack/commit/5d4180cf8288ae6ad9a771d18793d15bd0c5643c) | [BSD like license](http://www.netlib.org/lapack/LICENSE.txt)
541552
[portBLAS](https://github.com/codeplaysoftware/portBLAS) | 0.1 | [Apache License v2.0](https://github.com/codeplaysoftware/portBLAS/blob/main/LICENSE)
542553
[portFFT](https://github.com/codeplaysoftware/portFFT) | 0.1 | [Apache License v2.0](https://github.com/codeplaysoftware/portFFT/blob/main/LICENSE)

cmake/FindCompiler.cmake

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ if(is_dpcpp)
4343
list(APPEND UNIX_INTERFACE_LINK_OPTIONS
4444
-fsycl-targets=nvptx64-nvidia-cuda)
4545
elseif(ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND
46-
OR ENABLE_ROCSOLVER_BACKEND)
46+
OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCSPARSE_BACKEND)
4747
list(APPEND UNIX_INTERFACE_COMPILE_OPTIONS
4848
-fsycl-targets=amdgcn-amd-amdhsa -fsycl-unnamed-lambda
4949
-Xsycl-target-backend --offload-arch=${HIP_TARGETS})
@@ -52,7 +52,7 @@ if(is_dpcpp)
5252
--offload-arch=${HIP_TARGETS})
5353
endif()
5454
if(ENABLE_CURAND_BACKEND OR ENABLE_CUSOLVER_BACKEND OR ENABLE_CUSPARSE_BACKEND OR ENABLE_ROCBLAS_BACKEND
55-
OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
55+
OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCSPARSE_BACKEND)
5656
set_target_properties(ONEMATH::SYCL::SYCL PROPERTIES
5757
INTERFACE_COMPILE_OPTIONS "${UNIX_INTERFACE_COMPILE_OPTIONS}"
5858
INTERFACE_LINK_OPTIONS "${UNIX_INTERFACE_LINK_OPTIONS}"
@@ -69,7 +69,7 @@ if(is_dpcpp)
6969
INTERFACE_LINK_LIBRARIES ${SYCL_LIBRARY})
7070
endif()
7171

72-
if(ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND)
72+
if(ENABLE_ROCBLAS_BACKEND OR ENABLE_ROCRAND_BACKEND OR ENABLE_ROCSOLVER_BACKEND OR ENABLE_ROCSPARSE_BACKEND)
7373
# Allow find_package(HIP) to find the correct path to libclang_rt.builtins.a
7474
# HIP's CMake uses the command `${HIP_CXX_COMPILER} -print-libgcc-file-name --rtlib=compiler-rt` to find this path.
7575
# This can print a non-existing file if the compiler used is icpx.

docs/building_the_project_with_dpcpp.rst

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@ The most important supported build options are:
121121
* - ENABLE_ROCRAND_BACKEND
122122
- True, False
123123
- False
124+
* - ENABLE_ROCSPARSE_BACKEND
125+
- True, False
126+
- False
124127
* - ENABLE_MKLCPU_THREAD_TBB
125128
- True, False
126129
- True
@@ -197,14 +200,14 @@ Building for ROCm
197200
^^^^^^^^^^^^^^^^^
198201

199202
The ROCm backends can be enabled with ``ENABLE_ROCBLAS_BACKEND``,
200-
``ENABLE_ROCFFT_BACKEND``, ``ENABLE_ROCSOLVER_BACKEND`` and
201-
``ENABLE_ROCRAND_BACKEND``.
203+
``ENABLE_ROCFFT_BACKEND``, ``ENABLE_ROCSOLVER_BACKEND``,
204+
``ENABLE_ROCRAND_BACKEND``, and ``ENABLE_ROCSPARSE_BACKEND``.
202205

203-
For *RocBLAS*, *RocSOLVER* and *RocRAND*, the target device architecture must be
204-
set. This can be set with using the ``HIP_TARGETS`` parameter. For example, to
205-
enable a build for MI200 series GPUs, ``-DHIP_TARGETS=gfx90a`` should be set.
206-
Currently, DPC++ can only build for a single HIP target at a time. This may
207-
change in future versions.
206+
For *RocBLAS*, *RocSOLVER*, *RocRAND*, and *RocSPARSE*, the target device
207+
architecture must be set. This can be set with using the ``HIP_TARGETS``
208+
parameter. For example, to enable a build for MI200 series GPUs,
209+
``-DHIP_TARGETS=gfx90a`` should be set. Currently, DPC++ can only build for a
210+
single HIP target at a time. This may change in future versions.
208211

209212
A few often-used architectures are listed below:
210213

@@ -393,7 +396,8 @@ disabled:
393396
-DENABLE_MKLGPU_BACKEND=False \
394397
-DENABLE_ROCFFT_BACKEND=True \
395398
-DENABLE_ROCBLAS_BACKEND=True \
396-
-DENABLE_ROCSOLVER_BACKEND=True \
399+
-DENABLE_ROCSOLVER_BACKEND=True \
400+
-DENABLE_ROCSPARSE_BACKEND=True \
397401
-DHIP_TARGETS=gfx90a \
398402
-DBUILD_FUNCTIONAL_TESTS=False
399403

docs/domains/sparse_linear_algebra.rst

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,31 @@ Currently known limitations:
6868
``cusparseSpMV_preprocess``. Feel free to create an issue if this is needed.
6969

7070

71+
rocSPARSE backend
72+
----------------
73+
74+
Currently known limitations:
75+
76+
- Using ``spmv`` with a ``type_view`` other than ``matrix_descr::general`` will
77+
throw a ``oneapi::math::unimplemented`` exception.
78+
- The COO format requires the indices to be sorted by row then by column. See
79+
the `rocSPARSE COO documentation
80+
<https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#coo-storage-format>`_.
81+
Sparse operations using matrices with the COO format without the property
82+
``matrix_property::sorted`` will throw a ``oneapi::math::unimplemented``
83+
exception.
84+
- The CSR format requires the column indices to be sorted within each row. See
85+
the `rocSPARSE CSR documentation
86+
<https://rocm.docs.amd.com/projects/rocSPARSE/en/latest/how-to/basics.html#csr-storage-format>`_.
87+
Sparse operations using matrices with the CSR format without the property
88+
``matrix_property::sorted`` will throw a ``oneapi::math::unimplemented``
89+
exception.
90+
- The same sparse matrix handle cannot be reused for multiple operations
91+
``spmm``, ``spmv``, or ``spsv``. Doing so will throw a
92+
``oneapi::math::unimplemented`` exception. See `#332
93+
<https://github.com/ROCm/rocSPARSE/issues/332>`_.
94+
95+
7196
Operation algorithms mapping
7297
----------------------------
7398

@@ -89,33 +114,43 @@ spmm
89114
* - ``spmm_alg`` value
90115
- MKLCPU/MKLGPU
91116
- cuSPARSE
117+
- rocSPARSE
92118
* - ``default_alg``
93119
- none
94120
- ``CUSPARSE_SPMM_ALG_DEFAULT``
121+
- ``rocsparse_spmm_alg_default``
95122
* - ``no_optimize_alg``
96123
- none
97124
- ``CUSPARSE_SPMM_ALG_DEFAULT``
125+
- ``rocsparse_spmm_alg_default``
98126
* - ``coo_alg1``
99127
- none
100128
- ``CUSPARSE_SPMM_COO_ALG1``
129+
- ``rocsparse_spmm_alg_coo_segmented``
101130
* - ``coo_alg2``
102131
- none
103132
- ``CUSPARSE_SPMM_COO_ALG2``
133+
- ``rocsparse_spmm_alg_coo_atomic``
104134
* - ``coo_alg3``
105135
- none
106136
- ``CUSPARSE_SPMM_COO_ALG3``
137+
- ``rocsparse_spmm_alg_coo_segmented_atomic``
107138
* - ``coo_alg4``
108139
- none
109140
- ``CUSPARSE_SPMM_COO_ALG4``
141+
- ``rocsparse_spmm_alg_default``
110142
* - ``csr_alg1``
111143
- none
112144
- ``CUSPARSE_SPMM_CSR_ALG1``
145+
- ``rocsparse_spmm_alg_csr``
113146
* - ``csr_alg2``
114147
- none
115148
- ``CUSPARSE_SPMM_CSR_ALG2``
149+
- ``rocsparse_spmm_alg_csr_row_split``
116150
* - ``csr_alg3``
117151
- none
118152
- ``CUSPARSE_SPMM_CSR_ALG3``
153+
- ``rocsparse_spmm_alg_csr_merge``
119154

120155

121156
spmv
@@ -128,27 +163,35 @@ spmv
128163
* - ``spmv_alg`` value
129164
- MKLCPU/MKLGPU
130165
- cuSPARSE
166+
- rocSPARSE
131167
* - ``default_alg``
132168
- none
133169
- ``CUSPARSE_SPMV_ALG_DEFAULT``
170+
- ``rocsparse_spmv_alg_default``
134171
* - ``no_optimize_alg``
135172
- none
136173
- ``CUSPARSE_SPMV_ALG_DEFAULT``
174+
- ``rocsparse_spmv_alg_default``
137175
* - ``coo_alg1``
138176
- none
139177
- ``CUSPARSE_SPMV_COO_ALG1``
178+
- ``rocsparse_spmv_alg_coo``
140179
* - ``coo_alg2``
141180
- none
142181
- ``CUSPARSE_SPMV_COO_ALG2``
182+
- ``rocsparse_spmv_alg_coo_atomic``
143183
* - ``csr_alg1``
144184
- none
145185
- ``CUSPARSE_SPMV_CSR_ALG1``
186+
- ``rocsparse_spmv_alg_csr_adaptive``
146187
* - ``csr_alg2``
147188
- none
148189
- ``CUSPARSE_SPMV_CSR_ALG2``
190+
- ``rocsparse_spmv_alg_csr_stream``
149191
* - ``csr_alg3``
150192
- none
151193
- ``CUSPARSE_SPMV_ALG_DEFAULT``
194+
- ``rocsparse_spmv_alg_csr_lrb``
152195

153196

154197
spsv
@@ -161,9 +204,12 @@ spsv
161204
* - ``spsv_alg`` value
162205
- MKLCPU/MKLGPU
163206
- cuSPARSE
207+
- rocSPARSE
164208
* - ``default_alg``
165209
- none
166210
- ``CUSPARSE_SPSV_ALG_DEFAULT``
211+
- ``rocsparse_spsv_alg_default``
167212
* - ``no_optimize_alg``
168213
- none
169214
- ``CUSPARSE_SPSV_ALG_DEFAULT``
215+
- ``rocsparse_spsv_alg_default``

examples/sparse_blas/run_time_dispatching/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ endif()
3636
if(ENABLE_CUSPARSE_BACKEND)
3737
list(APPEND DEVICE_FILTERS "cuda:gpu")
3838
endif()
39+
if(ENABLE_ROCSPARSE_BACKEND)
40+
list(APPEND DEVICE_FILTERS "hip:gpu")
41+
endif()
3942

4043
message(STATUS "ONEAPI_DEVICE_SELECTOR will be set to the following value(s): [${DEVICE_FILTERS}] for run-time dispatching examples")
4144

examples/sparse_blas/run_time_dispatching/sparse_blas_spmv_usm.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,11 @@ int run_sparse_matrix_vector_multiply_example(const sycl::device& dev) {
146146
oneapi::math::sparse::init_csr_matrix(main_queue, &A_handle, nrows, nrows, nnz,
147147
oneapi::math::index_base::zero, ia, ja, a);
148148

149+
// rocSPARSE backend requires that the property sorted is set when using matrices in CSR format.
150+
// Setting this property is also the best practice to get best performance.
151+
oneapi::math::sparse::set_matrix_property(main_queue, A_handle,
152+
oneapi::math::sparse::matrix_property::sorted);
153+
149154
// Create and initialize dense vector handles
150155
oneapi::math::sparse::dense_vector_handle_t x_handle = nullptr;
151156
oneapi::math::sparse::dense_vector_handle_t y_handle = nullptr;

include/oneapi/math/detail/backends.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ enum class backend {
4141
rocfft,
4242
portfft,
4343
cusparse,
44+
rocsparse,
4445
unsupported
4546
};
4647

@@ -63,6 +64,7 @@ static backendmap backend_map = { { backend::mklcpu, "mklcpu" },
6364
{ backend::rocfft, "rocfft" },
6465
{ backend::portfft, "portfft" },
6566
{ backend::cusparse, "cusparse" },
67+
{ backend::rocsparse, "rocsparse" },
6668
{ backend::unsupported, "unsupported" } };
6769
// clang-format on
6870

include/oneapi/math/detail/backends_table.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,12 @@ static std::map<domain, std::map<device, std::vector<const char*>>> libraries =
204204
{
205205
#ifdef ONEMATH_ENABLE_CUSPARSE_BACKEND
206206
LIB_NAME("sparse_blas_cusparse")
207+
#endif
208+
} },
209+
{ device::amdgpu,
210+
{
211+
#ifdef ONEMATH_ENABLE_ROCSPARSE_BACKEND
212+
LIB_NAME("sparse_blas_rocsparse")
207213
#endif
208214
} } } },
209215
};

include/oneapi/math/sparse_blas.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@
3737
#ifdef ONEMATH_ENABLE_CUSPARSE_BACKEND
3838
#include "sparse_blas/detail/cusparse/sparse_blas_ct.hpp"
3939
#endif
40+
#ifdef ONEMATH_ENABLE_ROCSPARSE_BACKEND
41+
#include "sparse_blas/detail/rocsparse/sparse_blas_ct.hpp"
42+
#endif
4043

4144
#include "sparse_blas/detail/sparse_blas_rt.hpp"
4245

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/***************************************************************************
2+
* Copyright (C) Codeplay Software Limited
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* For your convenience, a copy of the License has been included in this
10+
* repository.
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*
18+
**************************************************************************/
19+
20+
#ifndef _ONEMATH_SPARSE_BLAS_DETAIL_ROCSPARSE_ONEMATH_SPARSE_BLAS_ROCSPARSE_HPP_
21+
#define _ONEMATH_SPARSE_BLAS_DETAIL_ROCSPARSE_ONEMATH_SPARSE_BLAS_ROCSPARSE_HPP_
22+
23+
#include "oneapi/math/detail/export.hpp"
24+
#include "oneapi/math/sparse_blas/detail/helper_types.hpp"
25+
#include "oneapi/math/sparse_blas/types.hpp"
26+
27+
namespace oneapi::math::sparse::rocsparse {
28+
29+
#include "oneapi/math/sparse_blas/detail/onemath_sparse_blas_backends.hxx"
30+
31+
} // namespace oneapi::math::sparse::rocsparse
32+
33+
#endif // _ONEMATH_SPARSE_BLAS_DETAIL_ROCSPARSE_ONEMATH_SPARSE_BLAS_ROCSPARSE_HPP_

0 commit comments

Comments
 (0)