@@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
37
#include < ginkgo/ginkgo.hpp>
38
38
39
39
40
+ #include < cuda.h>
40
41
#include < cuda_runtime.h>
41
42
#include < cusparse.h>
42
43
#include < memory>
@@ -45,6 +46,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45
46
#include " cuda/base/cusparse_bindings.hpp"
46
47
#include " cuda/base/device_guard.hpp"
47
48
#include " cuda/base/pointer_mode_guard.hpp"
49
+ #include " cuda/base/types.hpp"
48
50
49
51
50
52
namespace detail {
@@ -54,7 +56,12 @@ class CuspBase : public gko::LinOp {
54
56
public:
55
57
cusparseMatDescr_t get_descr () const { return this ->descr_ .get (); }
56
58
57
- const gko::CudaExecutor *get_gpu_exec () const { return gpu_exec_.get (); }
59
+ // Return shared pointer not plain pointer such that CuspGenericSpMV uses
60
+ // gko::Array to allocate buffer.
61
+ std::shared_ptr<const gko::CudaExecutor> get_gpu_exec () const
62
+ {
63
+ return gpu_exec_;
64
+ }
58
65
59
66
protected:
60
67
void apply_impl (const gko::LinOp *, const gko::LinOp *, const gko::LinOp *,
@@ -475,6 +482,204 @@ class CuspHybrid
475
482
};
476
483
477
484
485
+ #if defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
486
+
487
+
488
+ template <typename ValueType>
489
+ void cusp_generic_spmv (std::shared_ptr<const gko::CudaExecutor> gpu_exec,
490
+ const cusparseSpMatDescr_t mat,
491
+ const gko::Array<ValueType> &scalars,
492
+ const gko::LinOp *b, gko::LinOp *x,
493
+ cusparseOperation_t trans, cusparseSpMVAlg_t alg)
494
+ {
495
+ cudaDataType_t cu_value = gko::kernels::cuda::cuda_data_type<ValueType>();
496
+ using gko::kernels::cuda::as_culibs_type;
497
+ auto dense_b = gko::as<gko::matrix::Dense<ValueType>>(b);
498
+ auto dense_x = gko::as<gko::matrix::Dense<ValueType>>(x);
499
+ auto db = dense_b->get_const_values ();
500
+ auto dx = dense_x->get_values ();
501
+ const auto id = gpu_exec->get_device_id ();
502
+ gko::cuda::device_guard g{id};
503
+ cusparseDnVecDescr_t vecb, vecx;
504
+ GKO_ASSERT_NO_CUSPARSE_ERRORS (
505
+ cusparseCreateDnVec (&vecx, dense_x->get_num_stored_elements (),
506
+ as_culibs_type (dx), cu_value));
507
+ // cusparseCreateDnVec only allows non-const pointer
508
+ GKO_ASSERT_NO_CUSPARSE_ERRORS (cusparseCreateDnVec (
509
+ &vecb, dense_b->get_num_stored_elements (),
510
+ as_culibs_type (const_cast <ValueType *>(db)), cu_value));
511
+
512
+ size_t buffer_size = 0 ;
513
+ GKO_ASSERT_NO_CUSPARSE_ERRORS (cusparseSpMV_bufferSize (
514
+ gpu_exec->get_cusparse_handle (), trans, &scalars.get_const_data ()[0 ],
515
+ mat, vecb, &scalars.get_const_data ()[1 ], vecx, cu_value, alg,
516
+ &buffer_size));
517
+ gko::Array<char > buffer_array (gpu_exec, buffer_size);
518
+ auto dbuffer = buffer_array.get_data ();
519
+ GKO_ASSERT_NO_CUSPARSE_ERRORS (cusparseSpMV (
520
+ gpu_exec->get_cusparse_handle (), trans, &scalars.get_const_data ()[0 ],
521
+ mat, vecb, &scalars.get_const_data ()[1 ], vecx, cu_value, alg, dbuffer));
522
+ GKO_ASSERT_NO_CUSPARSE_ERRORS (cusparseDestroyDnVec (vecx));
523
+ GKO_ASSERT_NO_CUSPARSE_ERRORS (cusparseDestroyDnVec (vecb));
524
+ }
525
+
526
+
527
+ template <typename ValueType = gko::default_precision,
528
+ typename IndexType = gko::int32,
529
+ cusparseSpMVAlg_t Alg = CUSPARSE_MV_ALG_DEFAULT>
530
+ class CuspGenericCsr
531
+ : public gko::EnableLinOp<CuspGenericCsr<ValueType, IndexType, Alg>,
532
+ CuspBase>,
533
+ public gko::EnableCreateMethod<CuspGenericCsr<ValueType, IndexType, Alg>>,
534
+ public gko::ReadableFromMatrixData<ValueType, IndexType> {
535
+ friend class gko ::EnableCreateMethod<CuspGenericCsr>;
536
+ friend class gko ::EnablePolymorphicObject<CuspGenericCsr, CuspBase>;
537
+
538
+ public:
539
+ using csr = gko::matrix::Csr<ValueType, IndexType>;
540
+ using mat_data = gko::matrix_data<ValueType, IndexType>;
541
+ cusparseIndexType_t cu_index =
542
+ gko::kernels::cuda::cusparse_index_type<IndexType>();
543
+ cudaDataType_t cu_value = gko::kernels::cuda::cuda_data_type<ValueType>();
544
+
545
+ void read (const mat_data &data) override
546
+ {
547
+ using gko::kernels::cuda::as_culibs_type;
548
+ csr_->read (data);
549
+ this ->set_size (gko::dim<2 >{csr_->get_size ()});
550
+ GKO_ASSERT_NO_CUSPARSE_ERRORS (
551
+ cusparseCreateCsr (&mat_, csr_->get_size ()[0 ], csr_->get_size ()[1 ],
552
+ csr_->get_num_stored_elements (),
553
+ as_culibs_type (csr_->get_row_ptrs ()),
554
+ as_culibs_type (csr_->get_col_idxs ()),
555
+ as_culibs_type (csr_->get_values ()), cu_index,
556
+ cu_index, CUSPARSE_INDEX_BASE_ZERO, cu_value));
557
+ }
558
+
559
+ gko::size_type get_num_stored_elements () const noexcept
560
+ {
561
+ return csr_->get_num_stored_elements ();
562
+ }
563
+
564
+ ~CuspGenericCsr () override
565
+ {
566
+ const auto id = this ->get_gpu_exec ()->get_device_id ();
567
+ try {
568
+ gko::cuda::device_guard g{id};
569
+ GKO_ASSERT_NO_CUSPARSE_ERRORS (cusparseDestroySpMat (mat_));
570
+ } catch (const std::exception &e) {
571
+ std::cerr << " Error when unallocating CuspGenericCsr mat_ matrix: "
572
+ << e.what () << std::endl;
573
+ }
574
+ }
575
+
576
+ CuspGenericCsr (const CuspGenericCsr &other) = delete ;
577
+
578
+ CuspGenericCsr &operator =(const CuspGenericCsr &other) = default ;
579
+
580
+ protected:
581
+ void apply_impl (const gko::LinOp *b, gko::LinOp *x) const override
582
+ {
583
+ cusp_generic_spmv (this ->get_gpu_exec (), mat_, scalars, b, x, trans_,
584
+ Alg);
585
+ }
586
+
587
+ CuspGenericCsr (std::shared_ptr<const gko::Executor> exec,
588
+ const gko::dim<2 > &size = gko::dim<2 >{})
589
+ : gko::EnableLinOp<CuspGenericCsr, CuspBase>(exec, size),
590
+ csr_ (std::move(
591
+ csr::create (exec, std::make_shared<typename csr::classical>()))),
592
+ trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
593
+ {}
594
+
595
+ private:
596
+ // Contains {alpha, beta}
597
+ gko::Array<ValueType> scalars{
598
+ this ->get_executor (), {gko::one<ValueType>(), gko::zero<ValueType>()}};
599
+ std::shared_ptr<csr> csr_;
600
+ cusparseOperation_t trans_;
601
+ cusparseSpMatDescr_t mat_;
602
+ };
603
+
604
+
605
+ template <typename ValueType = gko::default_precision,
606
+ typename IndexType = gko::int32>
607
+ class CuspGenericCoo
608
+ : public gko::EnableLinOp<CuspGenericCoo<ValueType, IndexType>, CuspBase>,
609
+ public gko::EnableCreateMethod<CuspGenericCoo<ValueType, IndexType>>,
610
+ public gko::ReadableFromMatrixData<ValueType, IndexType> {
611
+ friend class gko ::EnableCreateMethod<CuspGenericCoo>;
612
+ friend class gko ::EnablePolymorphicObject<CuspGenericCoo, CuspBase>;
613
+
614
+ public:
615
+ using coo = gko::matrix::Coo<ValueType, IndexType>;
616
+ using mat_data = gko::matrix_data<ValueType, IndexType>;
617
+ cusparseIndexType_t cu_index =
618
+ gko::kernels::cuda::cusparse_index_type<IndexType>();
619
+ cudaDataType_t cu_value = gko::kernels::cuda::cuda_data_type<ValueType>();
620
+
621
+ void read (const mat_data &data) override
622
+ {
623
+ using gko::kernels::cuda::as_culibs_type;
624
+ coo_->read (data);
625
+ this ->set_size (gko::dim<2 >{coo_->get_size ()});
626
+ GKO_ASSERT_NO_CUSPARSE_ERRORS (
627
+ cusparseCreateCoo (&mat_, coo_->get_size ()[0 ], coo_->get_size ()[1 ],
628
+ coo_->get_num_stored_elements (),
629
+ as_culibs_type (coo_->get_row_idxs ()),
630
+ as_culibs_type (coo_->get_col_idxs ()),
631
+ as_culibs_type (coo_->get_values ()), cu_index,
632
+ CUSPARSE_INDEX_BASE_ZERO, cu_value));
633
+ }
634
+
635
+ gko::size_type get_num_stored_elements () const noexcept
636
+ {
637
+ return coo_->get_num_stored_elements ();
638
+ }
639
+
640
+ ~CuspGenericCoo () override
641
+ {
642
+ const auto id = this ->get_gpu_exec ()->get_device_id ();
643
+ try {
644
+ gko::cuda::device_guard g{id};
645
+ GKO_ASSERT_NO_CUSPARSE_ERRORS (cusparseDestroySpMat (mat_));
646
+ } catch (const std::exception &e) {
647
+ std::cerr << " Error when unallocating CuspGenericCoo mat_ matrix: "
648
+ << e.what () << std::endl;
649
+ }
650
+ }
651
+
652
+ CuspGenericCoo (const CuspGenericCoo &other) = delete ;
653
+
654
+ CuspGenericCoo &operator =(const CuspGenericCoo &other) = default ;
655
+
656
+ protected:
657
+ void apply_impl (const gko::LinOp *b, gko::LinOp *x) const override
658
+ {
659
+ cusp_generic_spmv (this ->get_gpu_exec (), mat_, scalars, b, x, trans_,
660
+ CUSPARSE_MV_ALG_DEFAULT);
661
+ }
662
+
663
+ CuspGenericCoo (std::shared_ptr<const gko::Executor> exec,
664
+ const gko::dim<2 > &size = gko::dim<2 >{})
665
+ : gko::EnableLinOp<CuspGenericCoo, CuspBase>(exec, size),
666
+ coo_ (std::move(coo::create(exec))),
667
+ trans_ (CUSPARSE_OPERATION_NON_TRANSPOSE)
668
+ {}
669
+
670
+ private:
671
+ // Contains {alpha, beta}
672
+ gko::Array<ValueType> scalars{
673
+ this ->get_executor (), {gko::one<ValueType>(), gko::zero<ValueType>()}};
674
+ std::shared_ptr<coo> coo_;
675
+ cusparseOperation_t trans_;
676
+ cusparseSpMatDescr_t mat_;
677
+ };
678
+
679
+
680
+ #endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
681
+
682
+
478
683
} // namespace detail
479
684
480
685
@@ -485,6 +690,18 @@ using cusp_csrmp = detail::CuspCsrmp<>;
485
690
using cusp_csrmm = detail::CuspCsrmm<>;
486
691
487
692
693
+ #if defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
694
+
695
+
696
+ using cusp_gcsr = detail::CuspGenericCsr<>;
697
+ using cusp_gcsr2 =
698
+ detail::CuspGenericCsr<double , gko::int32, CUSPARSE_CSRMV_ALG2>;
699
+ using cusp_gcoo = detail::CuspGenericCoo<>;
700
+
701
+
702
+ #endif // defined(CUDA_VERSION) && (CUDA_VERSION >= 10010)
703
+
704
+
488
705
using cusp_coo =
489
706
detail::CuspHybrid<double , gko::int32, CUSPARSE_HYB_PARTITION_USER, 0 >;
490
707
using cusp_ell =
0 commit comments