Skip to content

Commit c1ac9f4

Browse files
authored
Merge: add cuda10.1 generic spmv in benchmark
It adds cuda10.1 generic spmv (Coo, CSR with Algorithm 1/2) in benchmark. Related PR: #468
2 parents e8de527 + 922f1fe commit c1ac9f4

File tree

4 files changed

+283
-2
lines changed

4 files changed

+283
-2
lines changed

benchmark/utils/cuda_linops.hpp

Lines changed: 218 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3737
#include <ginkgo/ginkgo.hpp>
3838

3939

40+
#include <cuda.h>
4041
#include <cuda_runtime.h>
4142
#include <cusparse.h>
4243
#include <memory>
@@ -45,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4546
#include "cuda/base/cusparse_bindings.hpp"
4647
#include "cuda/base/device_guard.hpp"
4748
#include "cuda/base/pointer_mode_guard.hpp"
49+
#include "cuda/base/types.hpp"
4850

4951

5052
namespace detail {
@@ -54,7 +56,12 @@ class CuspBase : public gko::LinOp {
5456
public:
5557
cusparseMatDescr_t get_descr() const { return this->descr_.get(); }
5658

57-
const gko::CudaExecutor *get_gpu_exec() const { return gpu_exec_.get(); }
59+
// Return shared pointer not plain pointer such that CuspGenericSpMV uses
60+
// gko::Array to allocate buffer.
61+
std::shared_ptr<const gko::CudaExecutor> get_gpu_exec() const
62+
{
63+
return gpu_exec_;
64+
}
5865

5966
protected:
6067
void apply_impl(const gko::LinOp *, const gko::LinOp *, const gko::LinOp *,
@@ -475,6 +482,204 @@ class CuspHybrid
475482
};
476483

477484

485+
#if defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
486+
487+
488+
template <typename ValueType>
489+
void cusp_generic_spmv(std::shared_ptr<const gko::CudaExecutor> gpu_exec,
490+
const cusparseSpMatDescr_t mat,
491+
const gko::Array<ValueType> &scalars,
492+
const gko::LinOp *b, gko::LinOp *x,
493+
cusparseOperation_t trans, cusparseSpMVAlg_t alg)
494+
{
495+
cudaDataType_t cu_value = gko::kernels::cuda::cuda_data_type<ValueType>();
496+
using gko::kernels::cuda::as_culibs_type;
497+
auto dense_b = gko::as<gko::matrix::Dense<ValueType>>(b);
498+
auto dense_x = gko::as<gko::matrix::Dense<ValueType>>(x);
499+
auto db = dense_b->get_const_values();
500+
auto dx = dense_x->get_values();
501+
const auto id = gpu_exec->get_device_id();
502+
gko::cuda::device_guard g{id};
503+
cusparseDnVecDescr_t vecb, vecx;
504+
GKO_ASSERT_NO_CUSPARSE_ERRORS(
505+
cusparseCreateDnVec(&vecx, dense_x->get_num_stored_elements(),
506+
as_culibs_type(dx), cu_value));
507+
// cusparseCreateDnVec only allows non-const pointer
508+
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCreateDnVec(
509+
&vecb, dense_b->get_num_stored_elements(),
510+
as_culibs_type(const_cast<ValueType *>(db)), cu_value));
511+
512+
size_t buffer_size = 0;
513+
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpMV_bufferSize(
514+
gpu_exec->get_cusparse_handle(), trans, &scalars.get_const_data()[0],
515+
mat, vecb, &scalars.get_const_data()[1], vecx, cu_value, alg,
516+
&buffer_size));
517+
gko::Array<char> buffer_array(gpu_exec, buffer_size);
518+
auto dbuffer = buffer_array.get_data();
519+
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpMV(
520+
gpu_exec->get_cusparse_handle(), trans, &scalars.get_const_data()[0],
521+
mat, vecb, &scalars.get_const_data()[1], vecx, cu_value, alg, dbuffer));
522+
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyDnVec(vecx));
523+
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyDnVec(vecb));
524+
}
525+
526+
527+
template <typename ValueType = gko::default_precision,
528+
typename IndexType = gko::int32,
529+
cusparseSpMVAlg_t Alg = CUSPARSE_MV_ALG_DEFAULT>
530+
class CuspGenericCsr
531+
: public gko::EnableLinOp<CuspGenericCsr<ValueType, IndexType, Alg>,
532+
CuspBase>,
533+
public gko::EnableCreateMethod<CuspGenericCsr<ValueType, IndexType, Alg>>,
534+
public gko::ReadableFromMatrixData<ValueType, IndexType> {
535+
friend class gko::EnableCreateMethod<CuspGenericCsr>;
536+
friend class gko::EnablePolymorphicObject<CuspGenericCsr, CuspBase>;
537+
538+
public:
539+
using csr = gko::matrix::Csr<ValueType, IndexType>;
540+
using mat_data = gko::matrix_data<ValueType, IndexType>;
541+
cusparseIndexType_t cu_index =
542+
gko::kernels::cuda::cusparse_index_type<IndexType>();
543+
cudaDataType_t cu_value = gko::kernels::cuda::cuda_data_type<ValueType>();
544+
545+
void read(const mat_data &data) override
546+
{
547+
using gko::kernels::cuda::as_culibs_type;
548+
csr_->read(data);
549+
this->set_size(gko::dim<2>{csr_->get_size()});
550+
GKO_ASSERT_NO_CUSPARSE_ERRORS(
551+
cusparseCreateCsr(&mat_, csr_->get_size()[0], csr_->get_size()[1],
552+
csr_->get_num_stored_elements(),
553+
as_culibs_type(csr_->get_row_ptrs()),
554+
as_culibs_type(csr_->get_col_idxs()),
555+
as_culibs_type(csr_->get_values()), cu_index,
556+
cu_index, CUSPARSE_INDEX_BASE_ZERO, cu_value));
557+
}
558+
559+
gko::size_type get_num_stored_elements() const noexcept
560+
{
561+
return csr_->get_num_stored_elements();
562+
}
563+
564+
~CuspGenericCsr() override
565+
{
566+
const auto id = this->get_gpu_exec()->get_device_id();
567+
try {
568+
gko::cuda::device_guard g{id};
569+
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroySpMat(mat_));
570+
} catch (const std::exception &e) {
571+
std::cerr << "Error when unallocating CuspGenericCsr mat_ matrix: "
572+
<< e.what() << std::endl;
573+
}
574+
}
575+
576+
CuspGenericCsr(const CuspGenericCsr &other) = delete;
577+
578+
CuspGenericCsr &operator=(const CuspGenericCsr &other) = default;
579+
580+
protected:
581+
void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override
582+
{
583+
cusp_generic_spmv(this->get_gpu_exec(), mat_, scalars, b, x, trans_,
584+
Alg);
585+
}
586+
587+
CuspGenericCsr(std::shared_ptr<const gko::Executor> exec,
588+
const gko::dim<2> &size = gko::dim<2>{})
589+
: gko::EnableLinOp<CuspGenericCsr, CuspBase>(exec, size),
590+
csr_(std::move(
591+
csr::create(exec, std::make_shared<typename csr::classical>()))),
592+
trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
593+
{}
594+
595+
private:
596+
// Contains {alpha, beta}
597+
gko::Array<ValueType> scalars{
598+
this->get_executor(), {gko::one<ValueType>(), gko::zero<ValueType>()}};
599+
std::shared_ptr<csr> csr_;
600+
cusparseOperation_t trans_;
601+
cusparseSpMatDescr_t mat_;
602+
};
603+
604+
605+
template <typename ValueType = gko::default_precision,
606+
typename IndexType = gko::int32>
607+
class CuspGenericCoo
608+
: public gko::EnableLinOp<CuspGenericCoo<ValueType, IndexType>, CuspBase>,
609+
public gko::EnableCreateMethod<CuspGenericCoo<ValueType, IndexType>>,
610+
public gko::ReadableFromMatrixData<ValueType, IndexType> {
611+
friend class gko::EnableCreateMethod<CuspGenericCoo>;
612+
friend class gko::EnablePolymorphicObject<CuspGenericCoo, CuspBase>;
613+
614+
public:
615+
using coo = gko::matrix::Coo<ValueType, IndexType>;
616+
using mat_data = gko::matrix_data<ValueType, IndexType>;
617+
cusparseIndexType_t cu_index =
618+
gko::kernels::cuda::cusparse_index_type<IndexType>();
619+
cudaDataType_t cu_value = gko::kernels::cuda::cuda_data_type<ValueType>();
620+
621+
void read(const mat_data &data) override
622+
{
623+
using gko::kernels::cuda::as_culibs_type;
624+
coo_->read(data);
625+
this->set_size(gko::dim<2>{coo_->get_size()});
626+
GKO_ASSERT_NO_CUSPARSE_ERRORS(
627+
cusparseCreateCoo(&mat_, coo_->get_size()[0], coo_->get_size()[1],
628+
coo_->get_num_stored_elements(),
629+
as_culibs_type(coo_->get_row_idxs()),
630+
as_culibs_type(coo_->get_col_idxs()),
631+
as_culibs_type(coo_->get_values()), cu_index,
632+
CUSPARSE_INDEX_BASE_ZERO, cu_value));
633+
}
634+
635+
gko::size_type get_num_stored_elements() const noexcept
636+
{
637+
return coo_->get_num_stored_elements();
638+
}
639+
640+
~CuspGenericCoo() override
641+
{
642+
const auto id = this->get_gpu_exec()->get_device_id();
643+
try {
644+
gko::cuda::device_guard g{id};
645+
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroySpMat(mat_));
646+
} catch (const std::exception &e) {
647+
std::cerr << "Error when unallocating CuspGenericCoo mat_ matrix: "
648+
<< e.what() << std::endl;
649+
}
650+
}
651+
652+
CuspGenericCoo(const CuspGenericCoo &other) = delete;
653+
654+
CuspGenericCoo &operator=(const CuspGenericCoo &other) = default;
655+
656+
protected:
657+
void apply_impl(const gko::LinOp *b, gko::LinOp *x) const override
658+
{
659+
cusp_generic_spmv(this->get_gpu_exec(), mat_, scalars, b, x, trans_,
660+
CUSPARSE_MV_ALG_DEFAULT);
661+
}
662+
663+
CuspGenericCoo(std::shared_ptr<const gko::Executor> exec,
664+
const gko::dim<2> &size = gko::dim<2>{})
665+
: gko::EnableLinOp<CuspGenericCoo, CuspBase>(exec, size),
666+
coo_(std::move(coo::create(exec))),
667+
trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
668+
{}
669+
670+
private:
671+
// Contains {alpha, beta}
672+
gko::Array<ValueType> scalars{
673+
this->get_executor(), {gko::one<ValueType>(), gko::zero<ValueType>()}};
674+
std::shared_ptr<coo> coo_;
675+
cusparseOperation_t trans_;
676+
cusparseSpMatDescr_t mat_;
677+
};
678+
679+
680+
#endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
681+
682+
478683
} // namespace detail
479684

480685

@@ -485,6 +690,18 @@ using cusp_csrmp = detail::CuspCsrmp<>;
485690
using cusp_csrmm = detail::CuspCsrmm<>;
486691

487692

693+
#if defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
694+
695+
696+
using cusp_gcsr = detail::CuspGenericCsr<>;
697+
using cusp_gcsr2 =
698+
detail::CuspGenericCsr<double, gko::int32, CUSPARSE_CSRMV_ALG2>;
699+
using cusp_gcoo = detail::CuspGenericCoo<>;
700+
701+
702+
#endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
703+
704+
488705
using cusp_coo =
489706
detail::CuspHybrid<double, gko::int32, CUSPARSE_HYB_PARTITION_USER, 0>;
490707
using cusp_ell =

benchmark/utils/formats.hpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,18 @@ std::string format_description =
9797
"cusp_csrex: benchmark CuSPARSE with the cusparseXcsrmvEx function.\n"
9898
"cusp_csrmp: benchmark CuSPARSE with the cusparseXcsrmv_mp function.\n"
9999
"cusp_csrmm: benchmark CuSPARSE with the cusparseXcsrmv_mm function."
100+
#if defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
101+
"\n"
102+
"cusp_gcsr: benchmark CuSPARSE with the generic csr with default "
103+
"algorithm.\n"
104+
"cusp_gcsr2: benchmark CuSPARSE with the generic csr with "
105+
"CUSPARSE_CSRMV_ALG2.\n"
106+
"cusp_gcoo: benchmark CuSPARSE with the generic coo with default "
107+
"algorithm.\n"
108+
#endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
100109
#endif // HAS_CUDA
101110
#ifdef HAS_HIP
111+
"\n"
102112
"hipsp_csr: benchmark HipSPARSE with the hipsparseXcsrmv function.\n"
103113
"hipsp_csrmm: benchmark HipSPARSE with the hipsparseXcsrmv_mm function.\n"
104114
"hipsp_hybrid: benchmark HipSPARSE spmv with hipsparseXhybmv and an "
@@ -163,6 +173,7 @@ std::unique_ptr<MatrixType> read_matrix_from_data(
163173
}
164174

165175

176+
// clang-format off
166177
const std::map<std::string, std::function<std::unique_ptr<gko::LinOp>(
167178
std::shared_ptr<const gko::Executor>,
168179
const gko::matrix_data<> &)>>
@@ -181,6 +192,11 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOp>(
181192
{"cusp_hybrid", read_matrix_from_data<cusp_hybrid>},
182193
{"cusp_coo", read_matrix_from_data<cusp_coo>},
183194
{"cusp_ell", read_matrix_from_data<cusp_ell>},
195+
#if defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
196+
{"cusp_gcsr", read_matrix_from_data<cusp_gcsr>},
197+
{"cusp_gcsr2", read_matrix_from_data<cusp_gcsr2>},
198+
{"cusp_gcoo", read_matrix_from_data<cusp_gcoo>},
199+
#endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
184200
#endif // HAS_CUDA
185201
#ifdef HAS_HIP
186202
{"hipsp_csr", read_matrix_from_data<hipsp_csr>},
@@ -216,6 +232,7 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOp>(
216232
READ_MATRIX(hybrid,
217233
std::make_shared<hybrid::minimal_storage_limit>())},
218234
{"sellp", read_matrix_from_data<gko::matrix::Sellp<>>}};
235+
// clang-format on
219236

220237

221238
} // namespace formats

common/components/atomic.hpp.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ GKO_BIND_ATOMIC_HELPER_STRUCTURE(unsigned long long int);
8585
GKO_BIND_ATOMIC_HELPER_STRUCTURE(unsigned int);
8686

8787

88-
#if !(defined(CUDA_VERSION) && (CUDA_VERSION < 10100))
88+
#if !(defined(CUDA_VERSION) && (CUDA_VERSION < 10010))
8989
// CUDA 10.1 starts supporting 16-bit unsigned short int atomicCAS
9090
GKO_BIND_ATOMIC_HELPER_STRUCTURE(unsigned short int);
9191
#endif

cuda/base/types.hpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3535

3636

3737
#include <cublas_v2.h>
38+
#include <cusparse.h>
3839
#include <thrust/complex.h>
3940

4041

@@ -190,6 +191,31 @@ constexpr cudaDataType_t cuda_data_type_impl<uint8>()
190191
}
191192

192193

194+
#if defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
195+
196+
197+
template <typename T>
198+
constexpr cusparseIndexType_t cusparse_index_type_impl()
199+
{
200+
return CUSPARSE_INDEX_16U;
201+
}
202+
203+
template <>
204+
constexpr cusparseIndexType_t cusparse_index_type_impl<int32>()
205+
{
206+
return CUSPARSE_INDEX_32I;
207+
}
208+
209+
template <>
210+
constexpr cusparseIndexType_t cusparse_index_type_impl<int64>()
211+
{
212+
return CUSPARSE_INDEX_64I;
213+
}
214+
215+
216+
#endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
217+
218+
193219
} // namespace detail
194220

195221

@@ -208,6 +234,27 @@ constexpr cudaDataType_t cuda_data_type()
208234
}
209235

210236

237+
#if defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
238+
239+
240+
/**
241+
* This is an alias for the `cudaIndexType_t` equivalent of `T`. By default,
242+
* CUSPARSE_INDEX_16U is returned.
243+
*
244+
* @tparam T a type
245+
*
246+
* @returns the actual `cusparseIndexType_t`
247+
*/
248+
template <typename T>
249+
constexpr cusparseIndexType_t cusparse_index_type()
250+
{
251+
return detail::cusparse_index_type_impl<T>();
252+
}
253+
254+
255+
#endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
256+
257+
211258
/**
212259
* This is an alias for CUDA's equivalent of `T`.
213260
*

0 commit comments

Comments
 (0)