From 93821de9d1709f70e4489bd4dbd9158d68d044c1 Mon Sep 17 00:00:00 2001 From: Pierre Marchand Date: Fri, 26 Jul 2024 18:52:54 +0200 Subject: [PATCH] rework on ddm solvers --- include/htool/hmatrix/hmatrix.hpp | 4 +- .../hmatrix/tree_builder/tree_builder.hpp | 2 +- include/htool/solvers/ddm.hpp | 464 +++++++----------- .../interfaces/virtual_local_solver.hpp | 50 ++ .../local_hmatrix_plus_overlap_solvers.hpp | 121 +++++ .../local_solvers/local_hmatrix_solvers.hpp | 82 ++++ include/htool/solvers/utility.hpp | 235 ++++++--- include/htool/wrappers/wrapper_hpddm.hpp | 44 +- tests/functional_tests/solvers/CMakeLists.txt | 2 +- .../functional_tests/solvers/test_solver.cpp | 11 +- .../solvers/test_solver_ddm.hpp | 272 ++++++++-- .../test_solver_ddm_adding_overlap.hpp | 245 ++++++++- .../solvers/test_solver_wo_overlap.hpp | 21 +- 13 files changed, 1140 insertions(+), 413 deletions(-) create mode 100644 include/htool/solvers/interfaces/virtual_local_solver.hpp create mode 100644 include/htool/solvers/local_solvers/local_hmatrix_plus_overlap_solvers.hpp create mode 100644 include/htool/solvers/local_solvers/local_hmatrix_solvers.hpp diff --git a/include/htool/hmatrix/hmatrix.hpp b/include/htool/hmatrix/hmatrix.hpp index 0c5a82c6..06a51b1f 100644 --- a/include/htool/hmatrix/hmatrix.hpp +++ b/include/htool/hmatrix/hmatrix.hpp @@ -83,7 +83,9 @@ class HMatrix : public TreeNode *target_cluster, const Cluster *source_cluster) : TreeNode>(parent), m_target_cluster(target_cluster), m_source_cluster(source_cluster) {} HMatrix(const HMatrix &rhs) : TreeNode, HMatrixTreeData>(rhs), m_target_cluster(rhs.m_target_cluster), m_source_cluster(rhs.m_source_cluster), m_symmetry(rhs.m_symmetry), m_UPLO(rhs.m_UPLO), m_leaves(), m_leaves_for_symmetry(), m_symmetry_type_for_leaves(), m_storage_type(rhs.m_storage_type) { - Logger::get_instance().log(LogLevel::INFO, "Deep copy of HMatrix"); + if (m_target_cluster->is_root() or is_cluster_on_partition(*m_target_cluster)) { + Logger::get_instance().log(LogLevel::INFO, "Deep copy of HMatrix"); + } this->m_depth = rhs.m_depth; this->m_is_root = rhs.m_is_root; this->m_tree_data = std::make_shared>(*rhs.m_tree_data); diff --git a/include/htool/hmatrix/tree_builder/tree_builder.hpp b/include/htool/hmatrix/tree_builder/tree_builder.hpp index 5afc3402..6c18eec9 100644 --- a/include/htool/hmatrix/tree_builder/tree_builder.hpp +++ b/include/htool/hmatrix/tree_builder/tree_builder.hpp @@ -9,7 +9,7 @@ namespace htool { -template +template > class HMatrixTreeBuilder { private: class ZeroGenerator : public VirtualGenerator { diff --git a/include/htool/solvers/ddm.hpp b/include/htool/solvers/ddm.hpp index ba80b45d..d4d4982f 100644 --- a/include/htool/solvers/ddm.hpp +++ b/include/htool/solvers/ddm.hpp @@ -11,10 +11,13 @@ #include "./geneo/coarse_operator_builder.hpp" #include "./interfaces/virtual_coarse_operator_builder.hpp" #include "./interfaces/virtual_coarse_space_builder.hpp" +#include "./interfaces/virtual_local_solver.hpp" +#include "./local_solvers/local_hmatrix_plus_overlap_solvers.hpp" +#include "./local_solvers/local_hmatrix_solvers.hpp" namespace htool { -template +template class LocalSolver> class DDM { private: std::function get_rankWorld = [](MPI_Comm comm) { @@ -22,11 +25,10 @@ class DDM { MPI_Comm_rank(comm, &rankWorld); return rankWorld; }; - int n; + int m_size_w_overlap; int n_inside; - std::unique_ptr> hpddm_op; - Matrix &mat_loc; - std::vector D; + std::unique_ptr> m_hpddm_op; + std::vector> D; int nevi; bool one_level; bool two_level; @@ -42,51 +44,23 @@ class DDM { DDM &operator=(DDM &&cluster) noexcept = default; virtual ~DDM() = default; void clean() { - hpddm_op.reset(); + m_hpddm_op.reset(); } - DDM(const DistributedOperator &distributed_operator, Matrix &local_dense_matrix, const std::vector &neighbors, const std::vector> &intersections) : n(local_dense_matrix.nb_rows()), n_inside(distributed_operator.get_target_partition().get_size_of_partition(get_rankWorld(distributed_operator.get_comm()))), hpddm_op(std::make_unique>(&distributed_operator)), mat_loc(local_dense_matrix), D(n), nevi(0), one_level(0), two_level(0) { - // Timing - double mytime, maxtime; - double time = MPI_Wtime(); - - // Symmetry and storage - bool sym = false; - if (distributed_operator.get_symmetry_type() == 'S' || (distributed_operator.get_symmetry_type() == 'H' && is_complex())) { - sym = true; - - if (distributed_operator.get_storage_type() == 'U') { - htool::Logger::get_instance().log(LogLevel::ERROR, "HPDDM takes lower symmetric/hermitian matrices or regular matrices"); // LCOV_EXCL_LINE - // throw std::invalid_argument("[Htool error] HPDDM takes lower symmetric/hermitian matrices or regular matrices"); // LCOV_EXCL_LINE - } - if (distributed_operator.get_symmetry_type() == 'S' && is_complex()) { - htool::Logger::get_instance().log(LogLevel::WARNING, "A symmetric matrix with UPLO='L' has been given to DDM solver. It will be considered hermitian by the solver"); // LCOV_EXCL_LINE - // std::cout << "[Htool warning] A symmetric matrix with UPLO='L' has been given to DDM solver. It will be considered hermitian by the solver." << std::endl; - } - } - - hpddm_op->initialize(n, sym, mat_loc.data(), neighbors, intersections); - - fill(D.begin(), D.begin() + n_inside, 1); - fill(D.begin() + n_inside, D.end(), 0); - - hpddm_op->HPDDMDense::super::super::initialize(D.data()); - mytime = MPI_Wtime() - time; - - // Timing - MPI_Reduce(&(mytime), &(maxtime), 1, MPI_DOUBLE, MPI_MAX, 0, hpddm_op->HA->get_comm()); - - infos["DDM_setup_one_level_max"] = NbrToStr(maxtime); + DDM(int size_w_overlap, const DistributedOperator &distributed_operator, std::unique_ptr> hpddm_op) : m_size_w_overlap(size_w_overlap), n_inside(distributed_operator.get_target_partition().get_size_of_partition(get_rankWorld(distributed_operator.get_comm()))), m_hpddm_op(std::move(hpddm_op)), D(size_w_overlap), nevi(0), one_level(0), two_level(0) { + fill(D.begin(), D.begin() + n_inside, htool::underlying_type(1)); + fill(D.begin() + n_inside, D.end(), htool::underlying_type(0)); + m_hpddm_op->HPDDMOperator::super::super::initialize(D.data()); } void facto_one_level() { double time = MPI_Wtime(); double mytime, maxtime; - hpddm_op->callNumfact(); + m_hpddm_op->callNumfact(); mytime = MPI_Wtime() - time; // Timing - MPI_Reduce(&(mytime), &(maxtime), 1, MPI_DOUBLE, MPI_MAX, 0, hpddm_op->HA->get_comm()); + MPI_Reduce(&(mytime), &(maxtime), 1, MPI_DOUBLE, MPI_MAX, 0, m_hpddm_op->HA->get_comm()); infos["DDM_facto_one_level_max"] = NbrToStr(maxtime); one_level = 1; @@ -113,26 +87,26 @@ class DDM { CoefficientPrecision **Z_ptr_ptr = new CoefficientPrecision *[nevi]; CoefficientPrecision *Z_ptr = Z.release(); for (int i = 0; i < nevi; i++) { - Z_ptr_ptr[i] = Z_ptr + i * n; + Z_ptr_ptr[i] = Z_ptr + i * m_size_w_overlap; } - hpddm_op->setVectors(Z_ptr_ptr); + m_hpddm_op->setVectors(Z_ptr_ptr); // // int rankWorld; // MPI_Comm_rank(hpddm_op->HA->get_comm(), &rankWorld); int sizeWorld; - MPI_Comm_size(hpddm_op->HA->get_comm(), &sizeWorld); + MPI_Comm_size(m_hpddm_op->HA->get_comm(), &sizeWorld); time = MPI_Wtime(); Matrix coarse_operator = coarse_operator_builder.build_coarse_operator(Z.nb_rows(), Z.nb_cols(), Z_ptr_ptr); mytime[1] = MPI_Wtime() - time; time = MPI_Wtime(); - hpddm_op->buildTwo(MPI_COMM_WORLD, coarse_operator.data()); + m_hpddm_op->buildTwo(MPI_COMM_WORLD, coarse_operator.data()); mytime[2] = MPI_Wtime() - time; // Timing - MPI_Reduce(&(mytime[0]), &(maxtime[0]), mytime.size(), MPI_DOUBLE, MPI_MAX, 0, hpddm_op->HA->get_comm()); + MPI_Reduce(&(mytime[0]), &(maxtime[0]), mytime.size(), MPI_DOUBLE, MPI_MAX, 0, m_hpddm_op->HA->get_comm()); infos["DDM_geev_max"] = NbrToStr(maxtime[0]); infos["DDM_setup_ZtAZ_max"] = NbrToStr(maxtime[1]); @@ -141,207 +115,6 @@ class DDM { two_level = 1; } - // void build_coarse_space(Matrix &Ki) { - // // Timing - // double mytime, maxtime; - // double time = MPI_Wtime(); - - // // - // int info; - - // // Partition of unity - // Matrix DAiD(n, n); - // for (int i = 0; i < n_inside; i++) { - // std::copy_n(mat_loc.data() + i * n, n_inside, &(DAiD(0, i))); - // } - - // // Build local eigenvalue problem - // int ldvl = n, ldvr = n, lwork = -1; - // int lda = n, ldb = n; - // std::vector work(n); - // std::vector rwork; - // std::vector index(n, 0); - // std::iota(index.begin(), index.end(), int(0)); - - // // int rankWorld; - // // MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); - // // DAiD.csv_save("DAiD_" + NbrToStr(rankWorld)); - // // Ki.csv_save("Ki_" + NbrToStr(rankWorld)); - - // if (hpddm_op->HA->get_symmetry_type() == 'S' || hpddm_op->HA->get_symmetry_type() == 'H') { - // char uplo = hpddm_op->HA->get_storage_type(); - // int itype = 1; - // std::vector> w(n); - // if (is_complex()) { - // rwork.resize(3 * n - 2); - // } - - // Lapack::gv(&itype, "V", &uplo, &n, DAiD.data(), &lda, Ki.data(), &ldb, w.data(), work.data(), &lwork, rwork.data(), &info); - // lwork = (int)std::real(work[0]); - // work.resize(lwork); - // Lapack::gv(&itype, "V", &uplo, &n, DAiD.data(), &lda, Ki.data(), &ldb, w.data(), work.data(), &lwork, rwork.data(), &info); - - // std::sort(index.begin(), index.end(), [&](const int &a, const int &b) { - // return (std::abs(w[a]) > std::abs(w[b])); - // }); - - // // if (rankWorld == 0) { - // // for (int i = 0; i < index.size(); i++) { - // // std::cout << std::abs(w[index[i]]) << " "; - // // } - // // std::cout << "\n"; - // // // std::cout << vr << "\n"; - // // } - // // MPI_Barrier(hpddm_op->HA->get_comm()); - // // if (rankWorld == 1) { - // // std::cout << "w : " << w << "\n"; - // // std::cout << "info: " << info << "\n"; - // // for (int i = 0; i < index.size(); i++) { - // // std::cout << std::abs(w[index[i]]) << " "; - // // } - // // std::cout << "\n"; - // // // std::cout << vr << "\n"; - // // } - - // HPDDM::Option &opt = *HPDDM::Option::get(); - // nevi = 0; - // double threshold = opt.val("geneo_threshold", -1.0); - // if (threshold > 0.0) { - // while (std::abs(w[index[nevi]]) > threshold && nevi < index.size()) { - // nevi++; - // } - - // } else { - // nevi = opt.val("geneo_nu", 2); - // } - - // opt["geneo_nu"] = nevi; - // m_Z = new CoefficientPrecision *[nevi]; - // *m_Z = new CoefficientPrecision[nevi * n]; - // for (int i = 0; i < nevi; i++) { - // m_Z[i] = *m_Z + i * n; - // std::copy_n(DAiD.data() + index[i] * n, n_inside, m_Z[i]); - // for (int j = n_inside; j < n; j++) { - - // m_Z[i][j] = 0; - // } - // } - // } else { - // if (is_complex()) { - // rwork.resize(8 * n); - // } - // std::vector alphar(n), alphai((is_complex() ? 0 : n)), beta(n); - // std::vector vl(n * n), vr(n * n); - - // Lapack::ggev("N", "V", &n, DAiD.data(), &lda, Ki.data(), &ldb, alphar.data(), alphai.data(), beta.data(), vl.data(), &ldvl, vr.data(), &ldvr, work.data(), &lwork, rwork.data(), &info); - // lwork = (int)std::real(work[0]); - // work.resize(lwork); - // Lapack::ggev("N", "V", &n, DAiD.data(), &lda, Ki.data(), &ldb, alphar.data(), alphai.data(), beta.data(), vl.data(), &ldvl, vr.data(), &ldvr, work.data(), &lwork, rwork.data(), &info); - - // std::sort(index.begin(), index.end(), [&](const int &a, const int &b) { - // return ((std::abs(beta[a]) < 1e-15 || (std::abs(alphar[a] / beta[a]) > std::abs(alphar[b] / beta[b]))) && !(std::abs(beta[b]) < 1e-15)); - // }); - - // // if (rankWorld == 0) { - // // // for (int i = 0; i < index.size(); i++) { - // // // std::cout << std::abs(alphar[index[i]] / beta[index[i]]) << " "; - // // // } - // // // std::cout << "\n"; - // // std::cout << vr << "\n"; - // // } - // // MPI_Barrier(hpddm_op->HA->get_comm()); - // // if (rankWorld == 1) { - // // // std::cout << "alphar : " << alphar << "\n"; - // // // std::cout << "alphai : " << alphai << "\n"; - // // // std::cout << "beta : " << beta << "\n"; - // // // std::cout << "info: " << info << "\n"; - // // // for (int i = 0; i < index.size(); i++) { - // // // std::cout << std::abs(alphar[index[i]] / beta[index[i]]) << " "; - // // // } - // // // std::cout << "\n"; - // // std::cout << vr << "\n"; - // // } - - // HPDDM::Option &opt = *HPDDM::Option::get(); - // nevi = 0; - // double threshold = opt.val("geneo_threshold", -1.0); - // if (threshold > 0.0) { - // while (std::abs(beta[index[nevi]]) < 1e-15 || (std::abs(alphar[index[nevi]] / beta[index[nevi]]) > threshold && nevi < index.size())) { - // nevi++; - // } - - // } else { - // nevi = opt.val("geneo_nu", 2); - // } - - // opt["geneo_nu"] = nevi; - // m_Z = new CoefficientPrecision *[nevi]; - // *m_Z = new CoefficientPrecision[nevi * n]; - // for (int i = 0; i < nevi; i++) { - // m_Z[i] = *m_Z + i * n; - // std::copy_n(vr.data() + index[i] * n, n_inside, m_Z[i]); - // for (int j = n_inside; j < n; j++) { - - // m_Z[i][j] = 0; - // } - // } - // } - - // // Matrix Z_test(n, nevi); - // // Z_test.assign(n, nevi, m_Z[0], false); - // // // int rankWorld; - // // MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); - // // Z_test.csv_save("test_2_" + NbrToStr(rankWorld)); - - // hpddm_op->setVectors(m_Z); - - // // timing - // mytime = MPI_Wtime() - time; - // MPI_Barrier(hpddm_op->HA->get_comm()); - // time = MPI_Wtime(); - // MPI_Reduce(&(mytime), &(maxtime), 1, MPI_DOUBLE, MPI_MAX, 0, hpddm_op->HA->get_comm()); - // infos["DDM_geev_max"] = NbrToStr(maxtime); - - // // - // build_E(m_Z); - // } - - // void build_E(CoefficientPrecision *const *Z) { - // // - // int sizeWorld; - // MPI_Comm_size(hpddm_op->HA->get_comm(), &sizeWorld); - - // // Timing - // std::vector mytime(2), maxtime(2); - // double time = MPI_Wtime(); - - // // Allgather - // std::vector recvcounts(sizeWorld); - // std::vector displs(sizeWorld); - // MPI_Allgather(&nevi, 1, MPI_INT, recvcounts.data(), 1, MPI_INT, hpddm_op->HA->get_comm()); - - // displs[0] = 0; - // for (int i = 1; i < sizeWorld; i++) { - // displs[i] = displs[i - 1] + recvcounts[i - 1]; - // } - - // Matrix E; - // htool::build_geneo_coarse_operator(*hpddm_op->HA, nevi, n, Z, E); - // mytime[0] = MPI_Wtime() - time; - - // time = MPI_Wtime(); - // hpddm_op->buildTwo(MPI_COMM_WORLD, E.release()); - // mytime[1] = MPI_Wtime() - time; - - // // Timing - // MPI_Reduce(&(mytime[0]), &(maxtime[0]), 2, MPI_DOUBLE, MPI_MAX, 0, hpddm_op->HA->get_comm()); - - // infos["DDM_setup_ZtAZ_max"] = NbrToStr(maxtime[0]); - // infos["DDM_facto_ZtAZ_max"] = NbrToStr(maxtime[1]); - // infos["GenEO_coarse_size"] = NbrToStr(E.nb_cols()); - // two_level = 1; - // } - void solve(const CoefficientPrecision *const rhs, CoefficientPrecision *const x, const int &mu = 1) { // Check facto if (!one_level && two_level) { @@ -353,13 +126,13 @@ class DDM { HPDDM::Option &opt = *HPDDM::Option::get(); switch (opt.val("schwarz_method", 0)) { case HPDDM_SCHWARZ_METHOD_NONE: - hpddm_op->setType(HPDDMDense::Prcndtnr::NO); + m_hpddm_op->setType(HPDDMOperator::Prcndtnr::NO); break; case HPDDM_SCHWARZ_METHOD_RAS: - hpddm_op->setType(HPDDMDense::Prcndtnr::GE); + m_hpddm_op->setType(HPDDMOperator::Prcndtnr::GE); break; case HPDDM_SCHWARZ_METHOD_ASM: - hpddm_op->setType(HPDDMDense::Prcndtnr::SY); + m_hpddm_op->setType(HPDDMOperator::Prcndtnr::SY); break; // case HPDDM_SCHWARZ_METHOD_OSM: // hpddm_op->setType(HPDDM::Schwarz::Prcndtnr::NO); @@ -373,46 +146,46 @@ class DDM { } // - MPI_Comm comm = hpddm_op->HA->get_comm(); + MPI_Comm comm = m_hpddm_op->HA->get_comm(); int rankWorld; int sizeWorld; MPI_Comm_rank(comm, &rankWorld); MPI_Comm_size(comm, &sizeWorld); - int offset = hpddm_op->HA->get_target_partition().get_offset_of_partition(rankWorld); + int offset = m_hpddm_op->HA->get_target_partition().get_offset_of_partition(rankWorld); // int size = hpddm_op->HA->get_local_size(); - int nb_rows = hpddm_op->HA->get_target_partition().get_global_size(); + int nb_rows = m_hpddm_op->HA->get_target_partition().get_global_size(); // int nb_vec_prod = StrToNbr(hpddm_op->HA->get_infos("nb_mat_vec_prod")); double time = MPI_Wtime(); // std::vector rhs_perm(nb_rows); - std::vector x_local(n * mu, 0); - std::vector local_rhs(n * mu, 0); - hpddm_op->in_global->resize(nb_rows * (mu == 1 ? 1 : 2 * mu)); - hpddm_op->buffer->resize(n_inside * (mu == 1 ? 1 : 2 * mu)); + std::vector x_local(m_size_w_overlap * mu, 0); + std::vector local_rhs(m_size_w_overlap * mu, 0); + m_hpddm_op->in_global->resize(nb_rows * (mu == 1 ? 1 : 2 * mu)); + m_hpddm_op->buffer->resize(n_inside * (mu == 1 ? 1 : 2 * mu)); // TODO: blocking ? for (int i = 0; i < mu; i++) { // Permutation - hpddm_op->HA->get_target_partition().global_to_partition_numbering(rhs + i * nb_rows, rhs_perm.data()); + m_hpddm_op->HA->get_target_partition().global_to_partition_numbering(rhs + i * nb_rows, rhs_perm.data()); // global_to_root_cluster(hpddm_op->HA->get_root_target_cluster(), rhs + i * nb_rows, rhs_perm.data()); // hpddm_op->HA->target_to_cluster_permutation(rhs + i * nb_rows, rhs_perm.data()); - std::copy_n(rhs_perm.begin() + offset, n_inside, local_rhs.begin() + i * n); + std::copy_n(rhs_perm.begin() + offset, n_inside, local_rhs.begin() + i * m_size_w_overlap); } // TODO: avoid com here // for (int i=0;iscaledexchange(local_rhs.data(), mu); + m_hpddm_op->scaledexchange(local_rhs.data(), mu); // Solve - int nb_it = HPDDM::IterativeMethod::solve(*hpddm_op, local_rhs.data(), x_local.data(), mu, comm); + int nb_it = HPDDM::IterativeMethod::solve(*m_hpddm_op, local_rhs.data(), x_local.data(), mu, comm); // Delete the overlap (useful only when mu>1 and n!=n_inside) for (int i = 0; i < mu; i++) { - std::copy_n(x_local.data() + i * n, n_inside, local_rhs.data() + i * n_inside); + std::copy_n(x_local.data() + i * m_size_w_overlap, n_inside, local_rhs.data() + i * n_inside); } // Local to global @@ -424,24 +197,24 @@ class DDM { for (int i = 0; i < sizeWorld; i++) { // recvcounts[i] = (hpddm_op->HA->get_root_target_cluster().get_clusters_on_partition()[i]->get_size()) * mu; - recvcounts[i] = (hpddm_op->HA->get_target_partition().get_size_of_partition(i)) * mu; + recvcounts[i] = (m_hpddm_op->HA->get_target_partition().get_size_of_partition(i)) * mu; if (i > 0) displs[i] = displs[i - 1] + recvcounts[i - 1]; } - MPI_Allgatherv(local_rhs.data(), recvcounts[rankWorld], wrapper_mpi::mpi_type(), hpddm_op->in_global->data() + (mu == 1 ? 0 : mu * nb_rows), &(recvcounts[0]), &(displs[0]), wrapper_mpi::mpi_type(), comm); + MPI_Allgatherv(local_rhs.data(), recvcounts[rankWorld], wrapper_mpi::mpi_type(), m_hpddm_op->in_global->data() + (mu == 1 ? 0 : mu * nb_rows), &(recvcounts[0]), &(displs[0]), wrapper_mpi::mpi_type(), comm); // for (int i = 0; i < mu; i++) { if (mu != 1) { for (int j = 0; j < sizeWorld; j++) { - std::copy_n(hpddm_op->in_global->data() + mu * nb_rows + displs[j] + i * recvcounts[j] / mu, recvcounts[j] / mu, hpddm_op->in_global->data() + i * nb_rows + displs[j] / mu); + std::copy_n(m_hpddm_op->in_global->data() + mu * nb_rows + displs[j] + i * recvcounts[j] / mu, recvcounts[j] / mu, m_hpddm_op->in_global->data() + i * nb_rows + displs[j] / mu); } } // Permutation - hpddm_op->HA->get_target_partition().partition_to_global_numbering(hpddm_op->in_global->data() + i * nb_rows, x + i * nb_rows); + m_hpddm_op->HA->get_target_partition().partition_to_global_numbering(m_hpddm_op->in_global->data() + i * nb_rows, x + i * nb_rows); // root_cluster_to_global(hpddm_op->HA->get_root_target_cluster(), hpddm_op->in_global->data() + i * nb_rows, x + i * nb_rows); // hpddm_op->HA->cluster_to_target_permutation(hpddm_op->in_global->data() + i * nb_rows, x + i * nb_rows); } @@ -512,13 +285,13 @@ class DDM { int nevi_min = nevi; if (rankWorld == 0) { - MPI_Reduce(MPI_IN_PLACE, &(nevi_mean), 1, MPI_INT, MPI_SUM, 0, hpddm_op->HA->get_comm()); - MPI_Reduce(MPI_IN_PLACE, &(nevi_max), 1, MPI_INT, MPI_MAX, 0, hpddm_op->HA->get_comm()); - MPI_Reduce(MPI_IN_PLACE, &(nevi_min), 1, MPI_INT, MPI_MIN, 0, hpddm_op->HA->get_comm()); + MPI_Reduce(MPI_IN_PLACE, &(nevi_mean), 1, MPI_INT, MPI_SUM, 0, m_hpddm_op->HA->get_comm()); + MPI_Reduce(MPI_IN_PLACE, &(nevi_max), 1, MPI_INT, MPI_MAX, 0, m_hpddm_op->HA->get_comm()); + MPI_Reduce(MPI_IN_PLACE, &(nevi_min), 1, MPI_INT, MPI_MIN, 0, m_hpddm_op->HA->get_comm()); } else { - MPI_Reduce(&(nevi_mean), &(nevi_mean), 1, MPI_INT, MPI_SUM, 0, hpddm_op->HA->get_comm()); - MPI_Reduce(&(nevi_max), &(nevi_max), 1, MPI_INT, MPI_MAX, 0, hpddm_op->HA->get_comm()); - MPI_Reduce(&(nevi_min), &(nevi_min), 1, MPI_INT, MPI_MIN, 0, hpddm_op->HA->get_comm()); + MPI_Reduce(&(nevi_mean), &(nevi_mean), 1, MPI_INT, MPI_SUM, 0, m_hpddm_op->HA->get_comm()); + MPI_Reduce(&(nevi_max), &(nevi_max), 1, MPI_INT, MPI_MAX, 0, m_hpddm_op->HA->get_comm()); + MPI_Reduce(&(nevi_min), &(nevi_min), 1, MPI_INT, MPI_MIN, 0, m_hpddm_op->HA->get_comm()); } infos["DDM_local_coarse_size_mean"] = NbrToStr((double)nevi_mean / (double)sizeWorld); @@ -544,7 +317,7 @@ class DDM { void print_infos() const { int rankWorld; - MPI_Comm_rank(hpddm_op->HA->get_comm(), &rankWorld); + MPI_Comm_rank(m_hpddm_op->HA->get_comm(), &rankWorld); if (rankWorld == 0) { for (std::map::const_iterator it = infos.begin(); it != infos.end(); ++it) { std::cout << it->first << "\t" << it->second << std::endl; @@ -555,7 +328,7 @@ class DDM { void save_infos(const std::string &outputname, std::ios_base::openmode mode = std::ios_base::app, const std::string &sep = " = ") const { int rankWorld; - MPI_Comm_rank(hpddm_op->HA->get_comm(), &rankWorld); + MPI_Comm_rank(m_hpddm_op->HA->get_comm(), &rankWorld); if (rankWorld == 0) { std::ofstream outputfile(outputname, mode); if (outputfile) { @@ -570,7 +343,7 @@ class DDM { } void add_infos(std::string key, std::string value) const { - if (hpddm_op->HA->get_rankworld() == 0) { + if (get_rankWorld(m_hpddm_op->HA->get_comm()) == 0) { if (infos.find(key) == infos.end()) { infos[key] = value; } else { @@ -580,7 +353,7 @@ class DDM { } void set_infos(std::string key, std::string value) const { - if (hpddm_op->HA->get_rankworld() == 0) { + if (get_rankWorld(m_hpddm_op->HA->get_comm()) == 0) { infos[key] = value; } } @@ -595,13 +368,144 @@ class DDM { return nevi; } int get_local_size() const { - return n; + return m_size_w_overlap; } - - // MPI_Comm get_comm() const { - // return comm; - // } }; +template +DDM make_DDM_solver(const DistributedOperator &distributed_operator, Matrix &local_dense_matrix, const std::vector &neighbors, const std::vector> &intersections) { + int rankWorld; + MPI_Comm_rank(distributed_operator.get_comm(), &rankWorld); + int n = local_dense_matrix.nb_rows(); + + // Timing + double mytime, maxtime; + double time = MPI_Wtime(); + + // Symmetry and storage + bool sym = false; + if (distributed_operator.get_symmetry_type() == 'S' || (distributed_operator.get_symmetry_type() == 'H' && is_complex())) { + sym = true; + + if (distributed_operator.get_storage_type() == 'U') { + htool::Logger::get_instance().log(LogLevel::ERROR, "HPDDM takes lower symmetric/hermitian matrices or regular matrices"); // LCOV_EXCL_LINE + // throw std::invalid_argument("[Htool error] HPDDM takes lower symmetric/hermitian matrices or regular matrices"); // LCOV_EXCL_LINE + } + if (distributed_operator.get_symmetry_type() == 'S' && is_complex()) { + htool::Logger::get_instance().log(LogLevel::WARNING, "A symmetric matrix with UPLO='L' has been given to DDM solver. It will be considered hermitian by the solver"); // LCOV_EXCL_LINE + // std::cout << "[Htool warning] A symmetric matrix with UPLO='L' has been given to DDM solver. It will be considered hermitian by the solver." << std::endl; + } + } + + std::unique_ptr> hpddm_op = std::make_unique>(&distributed_operator); + hpddm_op->initialize(n, sym, local_dense_matrix.data(), neighbors, intersections); + + mytime = MPI_Wtime() - time; + + // Timing + MPI_Reduce(&(mytime), &(maxtime), 1, MPI_DOUBLE, MPI_MAX, 0, distributed_operator.get_comm()); + + DDM ddm_solver(n, distributed_operator, std::move(hpddm_op)); + ddm_solver.set_infos("DDM_setup_one_level_max", NbrToStr(maxtime)); + + return ddm_solver; +} + +// template +// DDM make_DDM_solver_w_custom_local_solver(const DistributedOperator &distributed_operator, Matrix &local_dense_matrix, const std::vector &neighbors, const std::vector> &intersections) { +// int rankWorld; +// MPI_Comm_rank(distributed_operator.get_comm(), &rankWorld); +// int n = local_dense_matrix.nb_rows(); + +// // Timing +// double mytime, maxtime; +// double time = MPI_Wtime(); + +// // Symmetry and storage +// bool sym = false; +// if (distributed_operator.get_symmetry_type() == 'S' || (distributed_operator.get_symmetry_type() == 'H' && is_complex())) { +// sym = true; +// } + +// std::unique_ptr> hpddm_op = std::make_unique>(&distributed_operator); +// hpddm_op->initialize(n, sym, local_dense_matrix.data(), neighbors, intersections); // we should not give a local dense matrix + +// auto dense_local_solver = std::make_unique>(local_dense_matrix); +// HPDDMCustomLocalSolver hpddm_custom_local_solver(dense_local_solver); +// hpddm_op->set_local_solver(); + +// mytime = MPI_Wtime() - time; + +// // Timing +// MPI_Reduce(&(mytime), &(maxtime), 1, MPI_DOUBLE, MPI_MAX, 0, distributed_operator.get_comm()); + +// DDM ddm_solver(n, distributed_operator, std::move(hpddm_op)); +// ddm_solver.set_infos("DDM_setup_one_level_max", NbrToStr(maxtime)); + +// return ddm_solver; +// } + +template +DDM make_DDM_solver_w_custom_local_solver(const DistributedOperator &distributed_operator, HMatrix &local_hmatrix, const std::vector &neighbors, const std::vector> &intersections, bool use_permutation) { + int n = local_hmatrix.get_target_cluster().get_size(); + + // Timing + double mytime, maxtime; + double time = MPI_Wtime(); + + // Symmetry and storage + bool sym = false; + if (distributed_operator.get_symmetry_type() == 'S' || (distributed_operator.get_symmetry_type() == 'H' && is_complex())) { + sym = true; + } + + std::unique_ptr> hpddm_op = std::make_unique>(&distributed_operator); + hpddm_op->initialize(n, sym, nullptr, neighbors, intersections); // we should not give a local dense matrix + + auto local_hmatrix_solver = std::make_unique>(local_hmatrix, use_permutation); + hpddm_op->get_local_solver().set_local_solver(std::move(local_hmatrix_solver)); + + mytime = MPI_Wtime() - time; + + // Timing + MPI_Reduce(&(mytime), &(maxtime), 1, MPI_DOUBLE, MPI_MAX, 0, distributed_operator.get_comm()); + + DDM ddm_solver(n, distributed_operator, std::move(hpddm_op)); + ddm_solver.set_infos("DDM_setup_one_level_max", NbrToStr(maxtime)); + + return ddm_solver; +} + +template +DDM make_DDM_solver_w_custom_local_solver(const DistributedOperator &distributed_operator, HMatrix &local_hmatrix, Matrix &B, Matrix &C, Matrix &D, const std::vector &neighbors, const std::vector> &intersections) { + int n = local_hmatrix.get_target_cluster().get_size() + C.nb_rows(); + + // Timing + double mytime, maxtime; + double time = MPI_Wtime(); + + // Symmetry and storage + bool sym = false; + if (distributed_operator.get_symmetry_type() == 'S' || (distributed_operator.get_symmetry_type() == 'H' && is_complex())) { + sym = true; + } + + std::unique_ptr> hpddm_op = std::make_unique>(&distributed_operator); + hpddm_op->initialize(n, sym, nullptr, neighbors, intersections); // we should not give a local dense matrix + + auto local_hmatrix_solver = std::make_unique>(local_hmatrix, B, C, D); + hpddm_op->get_local_solver().set_local_solver(std::move(local_hmatrix_solver)); + + mytime = MPI_Wtime() - time; + + // Timing + MPI_Reduce(&(mytime), &(maxtime), 1, MPI_DOUBLE, MPI_MAX, 0, distributed_operator.get_comm()); + + DDM ddm_solver(n, distributed_operator, std::move(hpddm_op)); + ddm_solver.set_infos("DDM_setup_one_level_max", NbrToStr(maxtime)); + + return ddm_solver; +} + } // namespace htool #endif diff --git a/include/htool/solvers/interfaces/virtual_local_solver.hpp b/include/htool/solvers/interfaces/virtual_local_solver.hpp new file mode 100644 index 00000000..5ea33b99 --- /dev/null +++ b/include/htool/solvers/interfaces/virtual_local_solver.hpp @@ -0,0 +1,50 @@ +#ifndef HTOOL_WRAPPERS_INTERFACE_HPP +#define HTOOL_WRAPPERS_INTERFACE_HPP +#define HPDDM_NUMBERING 'F' +#define HPDDM_DENSE 1 +#define HPDDM_FETI 0 +#define HPDDM_BDD 0 +#define LAPACKSUB +#define DLAPACK +#define EIGENSOLVER 1 +#if defined(__clang__) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wsign-compare" +# pragma clang diagnostic ignored "-Wshadow" +# pragma clang diagnostic ignored "-Wdouble-promotion" +# pragma clang diagnostic ignored "-Wunused-parameter" +# pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#elif defined(__GNUC__) || defined(__GNUG__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wsign-compare" +# pragma GCC diagnostic ignored "-Wshadow" +# pragma GCC diagnostic ignored "-Wdouble-promotion" +# pragma GCC diagnostic ignored "-Wunused-parameter" +# pragma GCC diagnostic ignored "-Wnon-virtual-dtor" +# pragma GCC diagnostic ignored "-Wuseless-cast" +# pragma GCC diagnostic ignored "-Wunused-local-typedefs" +#endif + +#include + +#if defined(__clang__) +# pragma clang diagnostic pop +#elif defined(__GNUC__) || defined(__GNUG__) +# pragma GCC diagnostic pop +#endif + +namespace htool { + +template +class VirtualLocalSolver { + public: + virtual void numfact(HPDDM::MatrixCSR *const &, bool = false, CoefficientPrecision *const & = nullptr) = 0; + + virtual void solve(CoefficientPrecision *const, const unsigned short & = 1) const = 0; + virtual void solve(const CoefficientPrecision *const, CoefficientPrecision *const, const unsigned short & = 1) const = 0; + + virtual ~VirtualLocalSolver() {} +}; + +} // namespace htool +#endif diff --git a/include/htool/solvers/local_solvers/local_hmatrix_plus_overlap_solvers.hpp b/include/htool/solvers/local_solvers/local_hmatrix_plus_overlap_solvers.hpp new file mode 100644 index 00000000..42e9241f --- /dev/null +++ b/include/htool/solvers/local_solvers/local_hmatrix_plus_overlap_solvers.hpp @@ -0,0 +1,121 @@ +#ifndef HTOOL_SOLVERS_LOCAL_SOLVERS_HMATRIX_PLUS_OVERLAP_HPP +#define HTOOL_SOLVERS_LOCAL_SOLVERS_HMATRIX_PLUS_OVERLAP_HPP + +#include "../../hmatrix/linalg/factorization.hpp" +#include "../interfaces/virtual_local_solver.hpp" + +namespace htool { + +template +class LocalHMatrixPlusOverlapSolver : public VirtualLocalSolver { + private: + HMatrix &m_local_hmatrix; + Matrix &m_B, &m_C, &m_D; + mutable Matrix buffer; + + public: + LocalHMatrixPlusOverlapSolver(HMatrix &local_hmatrix, Matrix &B, Matrix &C, Matrix &D) : m_local_hmatrix(local_hmatrix), m_B(B), m_C(C), m_D(D) {} + void numfact(HPDDM::MatrixCSR *const &, bool = false, CoefficientPrecision *const & = nullptr) { + if (m_local_hmatrix.get_symmetry() == 'N') { + lu_factorization(m_local_hmatrix); + if (m_C.nb_rows() > 0) { + triangular_hmatrix_matrix_solve('L', 'L', 'N', 'U', CoefficientPrecision(1), m_local_hmatrix, m_B); + triangular_hmatrix_matrix_solve('R', 'U', 'N', 'N', CoefficientPrecision(1), m_local_hmatrix, m_C); + add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_C, m_B, CoefficientPrecision(1), m_D); + lu_factorization(m_D); + } + + } else if (m_local_hmatrix.get_symmetry() == 'S' || m_local_hmatrix.get_symmetry() == 'H') { + cholesky_factorization(m_local_hmatrix.get_UPLO(), m_local_hmatrix); + if (m_C.nb_rows() > 0) { + triangular_hmatrix_matrix_solve('R', m_local_hmatrix.get_UPLO(), is_complex() ? 'C' : 'T', 'N', CoefficientPrecision(1), m_local_hmatrix, m_C); + add_matrix_matrix_product('N', is_complex() ? 'C' : 'T', CoefficientPrecision(-1), m_C, m_C, CoefficientPrecision(1), m_D); + cholesky_factorization(m_local_hmatrix.get_UPLO(), m_D); + } + } + } + void solve(CoefficientPrecision *const b, const unsigned short &mu = 1) const { + int local_size_wo_overlap = m_local_hmatrix.get_target_cluster().get_size(); + int size_overlap = m_C.nb_rows(); + int local_size_w_overlap = local_size_wo_overlap + size_overlap; + Matrix b1(local_size_wo_overlap, mu), b2(size_overlap, mu); + + for (int i = 0; i < mu; i++) { + std::copy_n(b + i * local_size_w_overlap, local_size_wo_overlap, b1.data() + i * local_size_wo_overlap); + std::copy_n(b + i * local_size_w_overlap + local_size_wo_overlap, size_overlap, b2.data() + i * size_overlap); + } + + if (m_local_hmatrix.get_symmetry() == 'N') { + + triangular_hmatrix_matrix_solve('L', 'L', 'N', 'U', CoefficientPrecision(1), m_local_hmatrix, b1); + if (m_C.nb_rows() > 0) { + add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_C, b1, CoefficientPrecision(1), b2); + triangular_matrix_matrix_solve('L', 'L', 'N', 'U', CoefficientPrecision(1), m_D, b2); + + triangular_matrix_matrix_solve('L', 'U', 'N', 'N', CoefficientPrecision(1), m_D, b2); + add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_B, b2, CoefficientPrecision(1), b1); + } + triangular_hmatrix_matrix_solve('L', 'U', 'N', 'N', CoefficientPrecision(1), m_local_hmatrix, b1); + + } else if (m_local_hmatrix.get_symmetry() == 'S' || m_local_hmatrix.get_symmetry() == 'H') { + triangular_hmatrix_matrix_solve('L', 'L', 'N', 'N', CoefficientPrecision(1), m_local_hmatrix, b1); + + if (m_C.nb_rows() > 0) { + add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_C, b1, CoefficientPrecision(1), b2); + triangular_matrix_matrix_solve('L', 'L', 'N', 'U', CoefficientPrecision(1), m_D, b2); + + triangular_matrix_matrix_solve('L', 'L', is_complex() ? 'C' : 'T', 'N', CoefficientPrecision(1), m_D, b2); + add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_C, b2, CoefficientPrecision(1), b1); + } + triangular_hmatrix_matrix_solve('L', 'L', is_complex() ? 'C' : 'T', 'N', CoefficientPrecision(1), m_local_hmatrix, b1); + } + + for (int i = 0; i < mu; i++) { + std::copy_n(b1.data() + i * local_size_wo_overlap, local_size_wo_overlap, b + i * local_size_w_overlap); + std::copy_n(b2.data() + i * size_overlap, size_overlap, b + i * local_size_w_overlap + local_size_wo_overlap); + } + } + void solve(const CoefficientPrecision *const b, CoefficientPrecision *const x, const unsigned short &mu = 1) const { + + int local_size_wo_overlap = m_local_hmatrix.get_target_cluster().get_size(); + int size_overlap = m_C.nb_rows(); + int local_size_w_overlap = local_size_wo_overlap + size_overlap; + Matrix b1(local_size_wo_overlap, mu), b2(size_overlap, mu); + + for (int i = 0; i < mu; i++) { + std::copy_n(b + i * local_size_w_overlap, local_size_wo_overlap, b1.data() + i * local_size_wo_overlap); + std::copy_n(b + i * local_size_w_overlap + local_size_wo_overlap, size_overlap, b2.data() + i * size_overlap); + } + + if (m_local_hmatrix.get_symmetry() == 'N') { + + triangular_hmatrix_matrix_solve('L', 'L', 'N', 'U', CoefficientPrecision(1), m_local_hmatrix, b1); + + if (m_C.nb_rows() > 0) { + add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_C, b1, CoefficientPrecision(1), b2); + triangular_matrix_matrix_solve('L', 'L', 'N', 'U', CoefficientPrecision(1), m_D, b2); + triangular_matrix_matrix_solve('L', 'U', 'N', 'N', CoefficientPrecision(1), m_D, b2); + add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_B, b2, CoefficientPrecision(1), b1); + } + triangular_hmatrix_matrix_solve('L', 'U', 'N', 'N', CoefficientPrecision(1), m_local_hmatrix, b1); + + } else if (m_local_hmatrix.get_symmetry() == 'S' || m_local_hmatrix.get_symmetry() == 'H') { + triangular_hmatrix_matrix_solve('L', 'L', 'N', 'N', CoefficientPrecision(1), m_local_hmatrix, b1); + if (m_C.nb_rows() > 0) { + add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_C, b1, CoefficientPrecision(1), b2); + triangular_matrix_matrix_solve('L', 'L', 'N', 'U', CoefficientPrecision(1), m_D, b2); + + triangular_matrix_matrix_solve('L', 'L', is_complex() ? 'C' : 'T', 'N', CoefficientPrecision(1), m_D, b2); + add_matrix_matrix_product(is_complex() ? 'C' : 'T', 'N', CoefficientPrecision(-1), m_C, b2, CoefficientPrecision(1), b1); + } + triangular_hmatrix_matrix_solve('L', 'L', is_complex() ? 'C' : 'T', 'N', CoefficientPrecision(1), m_local_hmatrix, b1); + } + // std::cout << "TEST " << get_max(b1) << " " << get_max(b2) << "\n"; + for (int i = 0; i < mu; i++) { + std::copy_n(b1.data() + i * local_size_wo_overlap, local_size_wo_overlap, x + i * local_size_w_overlap); + std::copy_n(b2.data() + i * size_overlap, size_overlap, x + i * local_size_w_overlap + local_size_wo_overlap); + } + } +}; +} // namespace htool +#endif diff --git a/include/htool/solvers/local_solvers/local_hmatrix_solvers.hpp b/include/htool/solvers/local_solvers/local_hmatrix_solvers.hpp new file mode 100644 index 00000000..52023284 --- /dev/null +++ b/include/htool/solvers/local_solvers/local_hmatrix_solvers.hpp @@ -0,0 +1,82 @@ +#ifndef HTOOL_SOLVERS_LOCAL_SOLVERS_HMATRIX_HPP +#define HTOOL_SOLVERS_LOCAL_SOLVERS_HMATRIX_HPP + +#include "../../hmatrix/linalg/factorization.hpp" +#include "../interfaces/virtual_local_solver.hpp" + +namespace htool { + +template +class LocalHMatrixSolver : public VirtualLocalSolver { + private: + HMatrix &m_local_hmatrix; + bool m_is_using_permutation; + mutable Matrix buffer; + + public: + LocalHMatrixSolver(HMatrix &local_hmatrix, bool is_using_permutation) : m_local_hmatrix(local_hmatrix), m_is_using_permutation(is_using_permutation) {} + void numfact(HPDDM::MatrixCSR *const &, bool = false, CoefficientPrecision *const & = nullptr) { + if (m_local_hmatrix.get_symmetry() == 'N') { + lu_factorization(m_local_hmatrix); + } else if (m_local_hmatrix.get_symmetry() == 'S' || m_local_hmatrix.get_symmetry() == 'H') { + cholesky_factorization(m_local_hmatrix.get_UPLO(), m_local_hmatrix); + } + } + void solve(CoefficientPrecision *const b, const unsigned short &mu = 1) const { + + if (m_is_using_permutation) { + if (buffer.nb_rows() != m_local_hmatrix.nb_cols() or buffer.nb_cols() != mu) { + buffer.resize(m_local_hmatrix.nb_cols(), mu); + } + + auto &source_cluster = m_local_hmatrix.get_source_cluster(); + for (int i = 0; i < mu; i++) { + user_to_cluster(source_cluster, b + source_cluster.get_size() * i, buffer.data() + source_cluster.get_size() * i); + } + } else { + buffer.assign(m_local_hmatrix.nb_cols(), mu, b, false); + } + + if (m_local_hmatrix.get_symmetry() == 'N') { + lu_solve('N', m_local_hmatrix, buffer); + } else if (m_local_hmatrix.get_symmetry() == 'S' || m_local_hmatrix.get_symmetry() == 'H') { + cholesky_solve(m_local_hmatrix.get_UPLO(), m_local_hmatrix, buffer); + } + + if (m_is_using_permutation) { + auto &target_cluster = m_local_hmatrix.get_target_cluster(); + for (int i = 0; i < mu; i++) { + cluster_to_user(target_cluster, buffer.data() + target_cluster.get_size() * i, b + target_cluster.get_size() * i); + } + } + } + void solve(const CoefficientPrecision *const b, CoefficientPrecision *const x, const unsigned short &mu = 1) const { + if (buffer.nb_rows() != m_local_hmatrix.nb_cols() or buffer.nb_cols() != mu) { + buffer.resize(m_local_hmatrix.nb_cols(), mu); + } + if (m_is_using_permutation) { + auto &source_cluster = m_local_hmatrix.get_source_cluster(); + for (int i = 0; i < mu; i++) { + user_to_cluster(source_cluster, b + source_cluster.get_size() * i, buffer.data() + source_cluster.get_size() * i); + } + } else { + buffer.assign(m_local_hmatrix.nb_cols(), mu, x, false); + std::copy_n(b, mu * m_local_hmatrix.nb_cols(), buffer.data()); + } + + if (m_local_hmatrix.get_symmetry() == 'N') { + lu_solve('N', m_local_hmatrix, buffer); + } else if (m_local_hmatrix.get_symmetry() == 'S' || m_local_hmatrix.get_symmetry() == 'H') { + cholesky_solve(m_local_hmatrix.get_UPLO(), m_local_hmatrix, buffer); + } + + if (m_is_using_permutation) { + auto &target_cluster = m_local_hmatrix.get_target_cluster(); + for (int i = 0; i < mu; i++) { + cluster_to_user(target_cluster, buffer.data() + target_cluster.get_size() * i, x + target_cluster.get_size() * i); + } + } + } +}; +} // namespace htool +#endif diff --git a/include/htool/solvers/utility.hpp b/include/htool/solvers/utility.hpp index 51895741..cf8a7580 100644 --- a/include/htool/solvers/utility.hpp +++ b/include/htool/solvers/utility.hpp @@ -11,6 +11,7 @@ class LocalNumberingBuilder { public: std::vector local_to_global_numbering; std::vector> intersections; + LocalNumberingBuilder() {} LocalNumberingBuilder(const std::vector &ovr_subdomain_to_global, const std::vector &cluster_to_ovr_subdomain, const std::vector> &input_intersections) : local_to_global_numbering(ovr_subdomain_to_global.size()), intersections(input_intersections.size()) { // Renumbering for the overlap @@ -38,39 +39,47 @@ class LocalNumberingBuilder { } }; -template -class DefaultSolverBuilder { - private: - std::function(const HMatrix *)> initialize_diagonal_block = [](const HMatrix *block_diagonal_hmatrix_ptr) { - Matrix block(block_diagonal_hmatrix_ptr->get_target_cluster().get_size(), block_diagonal_hmatrix_ptr->get_source_cluster().get_size()); - copy_to_dense(*block_diagonal_hmatrix_ptr, block.data()); - return block; - }; - Matrix m_block_diagonal_dense_matrix; - std::vector m_neighbors; - std::vector> m_intersections; +// template > +// class BlockJacobiWithDenseLocalSolver { +// private: +// std::function(const HMatrix &)> initialize_diagonal_block = [](const HMatrix &block_diagonal_hmatrix_ptr) { +// Matrix block(block_diagonal_hmatrix_ptr.get_target_cluster().get_size(), block_diagonal_hmatrix_ptr.get_source_cluster().get_size()); +// copy_to_dense(block_diagonal_hmatrix_ptr, block.data()); +// return block; +// }; +// Matrix m_block_diagonal_dense_matrix; +// std::vector m_neighbors; +// std::vector> m_intersections; - public: - DDM solver; - DefaultSolverBuilder(DistributedOperator &distributed_operator, const HMatrix *block_diagonal_hmatrix) : m_block_diagonal_dense_matrix(initialize_diagonal_block(block_diagonal_hmatrix)), solver(distributed_operator, m_block_diagonal_dense_matrix, m_neighbors, m_intersections) {} -}; +// public: +// DDM solver; +// BlockJacobiWithDenseLocalSolver(DistributedOperator &distributed_operator, const HMatrix &block_diagonal_hmatrix) : m_block_diagonal_dense_matrix(initialize_diagonal_block(block_diagonal_hmatrix)), solver(make_DDM_solver(distributed_operator, m_block_diagonal_dense_matrix, m_neighbors, m_intersections)) {} +// }; -template -class DefaultDDMSolverBuilderAddingOverlap { +template > +class DDMSolverWithDenseLocalSolver { private: + std::vector m_neighbors; + std::vector> m_intersections; LocalNumberingBuilder m_local_numbering; public: const std::vector &local_to_global_numbering; private: - std::function(DistributedOperator &, const HMatrix *, const VirtualGeneratorInUserNumbering &)> initialize_diagonal_block = [this](DistributedOperator &distributed_operator0, const HMatrix *block_diagonal_hmatrix0, const VirtualGeneratorInUserNumbering &generator0) { + std::function(const HMatrix &)> initialize_diagonal_block_without_overlap = [](const HMatrix &block_diagonal_hmatrix_ptr) { + Matrix block(block_diagonal_hmatrix_ptr.get_target_cluster().get_size(), block_diagonal_hmatrix_ptr.get_source_cluster().get_size()); + copy_to_dense(block_diagonal_hmatrix_ptr, block.data()); + return block; + }; + + std::function(DistributedOperator &, const HMatrix &, const VirtualGeneratorInUserNumbering &)> initialize_diagonal_block_adding_overlap = [this](DistributedOperator &distributed_operator0, const HMatrix &block_diagonal_hmatrix0, const VirtualGeneratorInUserNumbering &generator0) { int local_size_with_overlap = local_to_global_numbering.size(); - int local_size_without_overlap = block_diagonal_hmatrix0->get_target_cluster().get_size(); + int local_size_without_overlap = block_diagonal_hmatrix0.get_target_cluster().get_size(); Matrix block_diagonal_dense_matrix_with_overlap(local_size_with_overlap, local_size_with_overlap), block_diagonal_dense_matrix_without_overlap(local_size_without_overlap, local_size_without_overlap); // Diagonal block without overlap - copy_to_dense(*block_diagonal_hmatrix0, block_diagonal_dense_matrix_without_overlap.data()); + copy_to_dense(block_diagonal_hmatrix0, block_diagonal_dense_matrix_without_overlap.data()); // Assemble block diagonal dense matrix with overlap for (int j = 0; j < local_size_without_overlap; j++) { @@ -103,60 +112,174 @@ class DefaultDDMSolverBuilderAddingOverlap { } return block_diagonal_dense_matrix_with_overlap; }; - std::vector> m_intersections; + + std::function(const HMatrix &)> initialize_diagonal_block_with_overlap = [](const HMatrix &block_diagonal_hmatrix0) { + int local_size_with_overlap = block_diagonal_hmatrix0.get_target_cluster().get_size(); + // Matrix block_diagonal_dense_matrix_with_overlap(local_size_with_overlap, local_size_with_overlap); + Matrix permuted_block_diagonal_dense_matrix_with_overlap(local_size_with_overlap, local_size_with_overlap); + copy_to_dense_in_user_numbering(block_diagonal_hmatrix0, permuted_block_diagonal_dense_matrix_with_overlap.data()); + + return permuted_block_diagonal_dense_matrix_with_overlap; + }; + + std::function(int, const CoordinatePrecision *)> initialize_local_cluster = [this](int spatial_dimension, const CoordinatePrecision *global_geometry) { + // Local geometry + int local_size = local_to_global_numbering.size(); + std::vector local_geometry(spatial_dimension * local_size); + for (int i = 0; i < local_to_global_numbering.size(); i++) { + for (int dimension = 0; dimension < spatial_dimension; dimension++) { + local_geometry[spatial_dimension * i + dimension] = global_geometry[spatial_dimension * local_to_global_numbering[i] + dimension]; + } + } + + // Local cluster + ClusterTreeBuilder recursive_build_strategy; + return recursive_build_strategy.create_cluster_tree(local_to_global_numbering.size(), spatial_dimension, local_geometry.data(), 2, 2, nullptr); + }; + + std::unique_ptr> local_cluster; + + std::function(const VirtualGeneratorInUserNumbering &, underlying_type, CoordinatePrecision, char)> initialize_local_hmatrix = [this](const VirtualGeneratorInUserNumbering &generator, underlying_type epsilon, CoordinatePrecision eta, char symmetry) { + struct LocalGeneratorInUserNumbering : public VirtualGeneratorInUserNumbering { + const std::vector &m_target_local_to_global_numbering; + const std::vector &m_source_local_to_global_numbering; + const VirtualGeneratorInUserNumbering &m_generator; + + LocalGeneratorInUserNumbering(const VirtualGeneratorInUserNumbering &generator, const std::vector &target_local_to_global_numbering, const std::vector &source_local_to_global_numbering) : m_target_local_to_global_numbering(target_local_to_global_numbering), m_source_local_to_global_numbering(source_local_to_global_numbering), m_generator(generator) {} + + void copy_submatrix(int M, int N, const int *const rows, const int *const cols, CoefficientPrecision *ptr) const override { + std::vector new_rows(M), new_cols(N); + for (int i = 0; i < M; i++) { + new_rows[i] = m_target_local_to_global_numbering[rows[i]]; + } + for (int j = 0; j < N; j++) { + new_cols[j] = m_source_local_to_global_numbering[cols[j]]; + } + m_generator.copy_submatrix(M, N, new_rows.data(), new_cols.data(), ptr); + } + }; + + // Local Generator + LocalGeneratorInUserNumbering local_generator(generator, local_to_global_numbering, local_to_global_numbering); + + // Local HMatrix + HMatrixTreeBuilder local_hmatrix_builder(*local_cluster, *local_cluster, epsilon, eta, symmetry, symmetry != 'N' ? 'L' : 'N', -1, -1, -1); + + return local_hmatrix_builder.build(local_generator); + }; public: + std::unique_ptr> local_hmatrix; Matrix block_diagonal_dense_matrix; - DDM solver; + DDM solver; + + // Block Jacobi + DDMSolverWithDenseLocalSolver(DistributedOperator &distributed_operator, const HMatrix &block_diagonal_hmatrix) : local_to_global_numbering(block_diagonal_hmatrix.get_target_cluster().get_permutation()), block_diagonal_dense_matrix(initialize_diagonal_block_without_overlap(block_diagonal_hmatrix)), solver(make_DDM_solver(distributed_operator, block_diagonal_dense_matrix, m_neighbors, m_intersections)) {} + + // DDM adding overlap to local hmatrix without overlap + DDMSolverWithDenseLocalSolver(DistributedOperator &distributed_operator, const HMatrix &block_diagonal_hmatrix, const VirtualGeneratorInUserNumbering &generator, const std::vector &ovr_subdomain_to_global, const std::vector &cluster_to_ovr_subdomain, const std::vector &neighbors, const std::vector> &intersections) : m_local_numbering(ovr_subdomain_to_global, cluster_to_ovr_subdomain, intersections), local_to_global_numbering(m_local_numbering.local_to_global_numbering), block_diagonal_dense_matrix(initialize_diagonal_block_adding_overlap(distributed_operator, block_diagonal_hmatrix, generator)), solver(make_DDM_solver(distributed_operator, block_diagonal_dense_matrix, neighbors, m_local_numbering.intersections)) {} - DefaultDDMSolverBuilderAddingOverlap(DistributedOperator &distributed_operator, const HMatrix *block_diagonal_hmatrix, const VirtualGeneratorInUserNumbering &generator, const std::vector &ovr_subdomain_to_global, const std::vector &cluster_to_ovr_subdomain, const std::vector &neighbors, const std::vector> &intersections) : m_local_numbering(ovr_subdomain_to_global, cluster_to_ovr_subdomain, intersections), local_to_global_numbering(m_local_numbering.local_to_global_numbering), block_diagonal_dense_matrix(initialize_diagonal_block(distributed_operator, block_diagonal_hmatrix, generator)), solver(distributed_operator, block_diagonal_dense_matrix, neighbors, m_local_numbering.intersections) {} + // DDM building local hmatrix with overlap + DDMSolverWithDenseLocalSolver(DistributedOperator &distributed_operator, const std::vector &ovr_subdomain_to_global, const std::vector &cluster_to_ovr_subdomain, const std::vector &neighbors, const std::vector> &intersections, const VirtualGeneratorInUserNumbering &generator, int spatial_dimension, const CoordinatePrecision *global_geometry, underlying_type epsilon, CoordinatePrecision eta) : m_local_numbering(ovr_subdomain_to_global, cluster_to_ovr_subdomain, intersections), local_to_global_numbering(m_local_numbering.local_to_global_numbering), local_cluster(std::make_unique>(initialize_local_cluster(spatial_dimension, global_geometry))), local_hmatrix(std::make_unique>(initialize_local_hmatrix(generator, epsilon, eta, distributed_operator.get_symmetry_type()))), block_diagonal_dense_matrix(initialize_diagonal_block_with_overlap(*local_hmatrix)), solver(make_DDM_solver(distributed_operator, block_diagonal_dense_matrix, neighbors, m_local_numbering.intersections)) {} }; -template -class DefaultDDMSolverBuilder { +template > +class DDMSolver { private: - std::function(const HMatrix &)> initialize_diagonal_block = [](const HMatrix &block_diagonal_hmatrix0) { - int local_size_with_overlap = block_diagonal_hmatrix0.get_target_cluster().get_size(); - Matrix block_diagonal_dense_matrix_with_overlap(local_size_with_overlap, local_size_with_overlap); - Matrix permuted_block_diagonal_dense_matrix_with_overlap(local_size_with_overlap, local_size_with_overlap); - // Diagonal block without overlap - copy_to_dense(block_diagonal_hmatrix0, block_diagonal_dense_matrix_with_overlap.data()); + std::vector m_neighbors; + std::vector> m_intersections; + LocalNumberingBuilder m_local_numbering; - int nr = block_diagonal_hmatrix0.get_target_cluster().get_size(); - int nc = block_diagonal_hmatrix0.get_source_cluster().get_size(); - auto &target_permutation = block_diagonal_hmatrix0.get_target_cluster().get_permutation(); + public: + const std::vector &local_to_global_numbering; - if (block_diagonal_hmatrix0.get_symmetry() == 'N') { - for (int i = 0; i < nr; i++) { - for (int j = 0; j < nc; j++) { + private: + std::function, 3>(DistributedOperator &, const HMatrix &, const VirtualGeneratorInUserNumbering &)> initialize_blocks_in_overlap = [this](DistributedOperator &distributed_operator0, const HMatrix &block_diagonal_hmatrix0, const VirtualGeneratorInUserNumbering &generator0) { + std::array, 3> blocks_in_overlap0; + Matrix &B = blocks_in_overlap0[0]; + Matrix &C = blocks_in_overlap0[1]; + Matrix &D = blocks_in_overlap0[2]; + int local_size_with_overlap = local_to_global_numbering.size(); + int local_size_without_overlap = block_diagonal_hmatrix0.get_target_cluster().get_size(); - permuted_block_diagonal_dense_matrix_with_overlap(target_permutation[i], target_permutation[j]) = block_diagonal_dense_matrix_with_overlap(i, j); - } + std::vector overlap_num(local_to_global_numbering.begin() + local_size_without_overlap, local_to_global_numbering.end()); + std::vector inside_num(local_to_global_numbering.begin(), local_to_global_numbering.begin() + local_size_without_overlap); + + // Overlap + D.resize(local_size_with_overlap - local_size_without_overlap, local_size_with_overlap - local_size_without_overlap); + C.resize(local_size_with_overlap - local_size_without_overlap, local_size_without_overlap); + + generator0.copy_submatrix(local_size_with_overlap - local_size_without_overlap, local_size_without_overlap, overlap_num.data(), inside_num.data(), C.data()); + + generator0.copy_submatrix(local_size_with_overlap - local_size_without_overlap, local_size_with_overlap - local_size_without_overlap, overlap_num.data(), overlap_num.data(), D.data()); + + bool sym = (distributed_operator0.get_symmetry_type() == 'S' || (distributed_operator0.get_symmetry_type() == 'H' && is_complex())) ? true : false; + if (!sym) { + B.resize(local_size_without_overlap, local_size_with_overlap - local_size_without_overlap); + generator0.copy_submatrix(local_size_without_overlap, local_size_with_overlap - local_size_without_overlap, inside_num.data(), overlap_num.data(), B.data()); + } + return blocks_in_overlap0; + }; + + std::function(int, const CoordinatePrecision *)> initialize_local_cluster = [this](int spatial_dimension, const CoordinatePrecision *global_geometry) { + // Local geometry + int local_size = local_to_global_numbering.size(); + std::vector local_geometry(spatial_dimension * local_size); + for (int i = 0; i < local_to_global_numbering.size(); i++) { + for (int dimension = 0; dimension < spatial_dimension; dimension++) { + local_geometry[spatial_dimension * i + dimension] = global_geometry[spatial_dimension * local_to_global_numbering[i] + dimension]; } - } else { - int index_i, index_j; - for (int i = 0; i < nr; i++) { - for (int j = 0; j <= i; j++) { - if (target_permutation[i] < target_permutation[j]) { - index_i = target_permutation[j]; - index_j = target_permutation[i]; - } else { - index_i = target_permutation[i]; - index_j = target_permutation[j]; - } - permuted_block_diagonal_dense_matrix_with_overlap(index_i, index_j) = block_diagonal_dense_matrix_with_overlap(i, j); + } + + // Local cluster + ClusterTreeBuilder recursive_build_strategy; + return recursive_build_strategy.create_cluster_tree(local_to_global_numbering.size(), spatial_dimension, local_geometry.data(), 2, 2); + }; + + std::unique_ptr> local_cluster; + + std::function(const VirtualGeneratorInUserNumbering &, underlying_type, CoordinatePrecision, char)> initialize_local_hmatrix = [this](const VirtualGeneratorInUserNumbering &generator, underlying_type epsilon, CoordinatePrecision eta, char symmetry) { + struct LocalGeneratorInUserNumbering : public VirtualGeneratorInUserNumbering { + const std::vector &m_target_local_to_global_numbering; + const std::vector &m_source_local_to_global_numbering; + const VirtualGeneratorInUserNumbering &m_generator; + + LocalGeneratorInUserNumbering(const VirtualGeneratorInUserNumbering &generator, const std::vector &target_local_to_global_numbering, const std::vector &source_local_to_global_numbering) : m_target_local_to_global_numbering(target_local_to_global_numbering), m_source_local_to_global_numbering(source_local_to_global_numbering), m_generator(generator) {} + + void copy_submatrix(int M, int N, const int *const rows, const int *const cols, CoefficientPrecision *ptr) const override { + std::vector new_rows(M), new_cols(N); + for (int i = 0; i < M; i++) { + new_rows[i] = m_target_local_to_global_numbering[rows[i]]; + } + for (int j = 0; j < N; j++) { + new_cols[j] = m_source_local_to_global_numbering[cols[j]]; } + m_generator.copy_submatrix(M, N, new_rows.data(), new_cols.data(), ptr); } - } + }; - return permuted_block_diagonal_dense_matrix_with_overlap; + // Local Generator + LocalGeneratorInUserNumbering local_generator(generator, local_to_global_numbering, local_to_global_numbering); + + // Local HMatrix + HMatrixTreeBuilder local_hmatrix_builder(*local_cluster, *local_cluster, epsilon, eta, symmetry, symmetry != 'N' ? 'L' : 'N', -1, -1, -1); + + return local_hmatrix_builder.build(local_generator); }; public: - Matrix block_diagonal_dense_matrix; - DDM solver; + std::unique_ptr> local_hmatrix; // A + std::array, 3> blocks_in_overlap; // B,C,D + DDM solver; + + // Block Jacobi + DDMSolver(DistributedOperator &distributed_operator, HMatrix &block_diagonal_hmatrix) : local_to_global_numbering(block_diagonal_hmatrix.get_target_cluster().get_permutation()), solver(make_DDM_solver_w_custom_local_solver(distributed_operator, block_diagonal_hmatrix, m_neighbors, m_intersections, false)) {} + + // DDM building local hmatrix adding overlap + DDMSolver(DistributedOperator &distributed_operator, HMatrix &block_diagonal_hmatrix, const VirtualGeneratorInUserNumbering &generator, const std::vector &ovr_subdomain_to_global, const std::vector &cluster_to_ovr_subdomain, const std::vector &neighbors, const std::vector> &intersections) : m_local_numbering(ovr_subdomain_to_global, cluster_to_ovr_subdomain, intersections), local_to_global_numbering(m_local_numbering.local_to_global_numbering), blocks_in_overlap(initialize_blocks_in_overlap(distributed_operator, block_diagonal_hmatrix, generator)), solver(make_DDM_solver_w_custom_local_solver(distributed_operator, block_diagonal_hmatrix, blocks_in_overlap[0], blocks_in_overlap[1], blocks_in_overlap[2], neighbors, m_local_numbering.intersections)) {} - DefaultDDMSolverBuilder(DistributedOperator &distributed_operator, const HMatrix &block_diagonal_hmatrix, const std::vector &neighbors, const std::vector> &intersections) : block_diagonal_dense_matrix(initialize_diagonal_block(block_diagonal_hmatrix)), solver(distributed_operator, block_diagonal_dense_matrix, neighbors, intersections) {} + // DDM building local hmatrix with overlap + DDMSolver(DistributedOperator &distributed_operator, const std::vector &ovr_subdomain_to_global, const std::vector &cluster_to_ovr_subdomain, const std::vector &neighbors, const std::vector> &intersections, const VirtualGeneratorInUserNumbering &generator, int spatial_dimension, const CoordinatePrecision *global_geometry, underlying_type epsilon, CoordinatePrecision eta) : m_local_numbering(ovr_subdomain_to_global, cluster_to_ovr_subdomain, intersections), local_to_global_numbering(m_local_numbering.local_to_global_numbering), local_cluster(std::make_unique>(initialize_local_cluster(spatial_dimension, global_geometry))), local_hmatrix(std::make_unique>(initialize_local_hmatrix(generator, epsilon, eta, distributed_operator.get_symmetry_type()))), solver(make_DDM_solver_w_custom_local_solver(distributed_operator, *local_hmatrix, neighbors, m_local_numbering.intersections, true)) {} }; } // namespace htool diff --git a/include/htool/wrappers/wrapper_hpddm.hpp b/include/htool/wrappers/wrapper_hpddm.hpp index 56c2c93f..80104afd 100644 --- a/include/htool/wrappers/wrapper_hpddm.hpp +++ b/include/htool/wrappers/wrapper_hpddm.hpp @@ -35,26 +35,50 @@ #endif #include "../distributed_operator/distributed_operator.hpp" - +#include "../solvers/interfaces/virtual_local_solver.hpp" namespace htool { -template +template class LocalSolver> class DDM; template -class HPDDMDense final : public HpDense { +class HPDDMCustomLocalSolver { + private: + std::unique_ptr> m_local_solver; + + public: + HPDDMCustomLocalSolver() {} + HPDDMCustomLocalSolver(const HPDDMCustomLocalSolver &) = delete; + void set_local_solver(std::unique_ptr> local_solver) { + m_local_solver = std::move(local_solver); + } + virtual ~HPDDMCustomLocalSolver() { dtor(); } + static constexpr char numbering_ = 'C'; + void dtor() { m_local_solver.reset(nullptr); } + template + void numfact(HPDDM::MatrixCSR *const &dummy, bool boolean = false, CoefficientPrecision *const &ptr = nullptr) { + m_local_solver->numfact(dummy, boolean, ptr); + } + void solve(CoefficientPrecision *const in, const unsigned short &a = 1) const { + m_local_solver->solve(in, a); + } + void solve(const CoefficientPrecision *const in, CoefficientPrecision *const in2, const unsigned short &out = 1) const { m_local_solver->solve(in, in2, out); } +}; + +template class LocalSolver> +class HPDDMOperator final : public HPDDM::Dense { protected: const DistributedOperator *HA; std::vector *in_global, *buffer; public: - typedef HpDense super; + typedef HPDDM::Dense super; - HPDDMDense(const DistributedOperator *A) : HA(A) { + HPDDMOperator(const DistributedOperator *A) : HA(A) { in_global = new std::vector; buffer = new std::vector; } - ~HPDDMDense() { + ~HPDDMOperator() { delete in_global; in_global = nullptr; delete buffer; @@ -113,13 +137,13 @@ class HPDDMDense final : public HpDense { void setType(typename super::Prcndtnr type) { this->type_ = type; } - friend class DDM; + friend class DDM; }; } // namespace htool -template -struct HPDDM::hpddm_method_id> { - static constexpr char value = HPDDM::hpddm_method_id>::value; +template class LocalSolver> +struct HPDDM::hpddm_method_id> { + static constexpr char value = HPDDM::hpddm_method_id>::value; }; #endif diff --git a/tests/functional_tests/solvers/CMakeLists.txt b/tests/functional_tests/solvers/CMakeLists.txt index ac17472d..99ddbe86 100644 --- a/tests/functional_tests/solvers/CMakeLists.txt +++ b/tests/functional_tests/solvers/CMakeLists.txt @@ -13,7 +13,7 @@ FetchContent_Declare( FetchContent_GetProperties(data_test_repository) if(NOT data_test_repository_POPULATED) - FetchContent_Populate(data_test_repository) + FetchContent_MakeAvailable(data_test_repository) endif() set(Test_solver_ARGS ${data_test_repository_SOURCE_DIR}/data/) diff --git a/tests/functional_tests/solvers/test_solver.cpp b/tests/functional_tests/solvers/test_solver.cpp index f49d5d2c..f11559dc 100644 --- a/tests/functional_tests/solvers/test_solver.cpp +++ b/tests/functional_tests/solvers/test_solver.cpp @@ -14,6 +14,8 @@ int main(int argc, char *argv[]) { } string datapath = argv[1]; + Logger::get_instance().set_current_log_level(LogLevel::INFO); + bool is_error = false; for (auto nb_rhs : {1, 5}) { @@ -25,9 +27,12 @@ int main(int argc, char *argv[]) { } for (auto symmetry : symmetries) { - is_error = is_error || test_solver_wo_overlap(argc, argv, nb_rhs, symmetry, datapath_final); - is_error = is_error || test_solver_ddm_adding_overlap(argc, argv, nb_rhs, data_symmetry, symmetry, datapath_final); - is_error = is_error || test_solver_ddm(argc, argv, nb_rhs, data_symmetry, symmetry, datapath_final); + is_error = is_error || test_solver_wo_overlap, double>>(argc, argv, nb_rhs, symmetry, datapath_final); + is_error = is_error || test_solver_ddm_adding_overlap>>(argc, argv, nb_rhs, data_symmetry, symmetry, datapath_final); + is_error = is_error || test_solver_ddm>>(argc, argv, nb_rhs, data_symmetry, symmetry, datapath_final); + is_error = is_error || test_solver_wo_overlap, double>>(argc, argv, nb_rhs, symmetry, datapath_final); + test_solver_ddm_adding_overlap>>(argc, argv, nb_rhs, data_symmetry, symmetry, datapath_final); + is_error = is_error || test_solver_ddm>>(argc, argv, nb_rhs, data_symmetry, symmetry, datapath_final); } } } diff --git a/tests/functional_tests/solvers/test_solver_ddm.hpp b/tests/functional_tests/solvers/test_solver_ddm.hpp index 82f7ba0f..a879393b 100644 --- a/tests/functional_tests/solvers/test_solver_ddm.hpp +++ b/tests/functional_tests/solvers/test_solver_ddm.hpp @@ -9,6 +9,7 @@ using namespace std; using namespace htool; +template int test_solver_ddm(int argc, char *argv[], int mu, char data_symmetry, char symmetric, std::string datapath) { // Get the number of processes @@ -32,7 +33,8 @@ int test_solver_ddm(int argc, char *argv[], int mu, char data_symmetry, char sym // HTOOL double epsilon = tol; - double eta = 10; + // epsilon = 1e-10; + double eta = 10; char UPLO = 'N'; if (symmetric != 'N') { @@ -92,38 +94,16 @@ int test_solver_ddm(int argc, char *argv[], int mu, char data_symmetry, char sym bytes_to_vector(intersections[p], datapath + "/intersections_" + NbrToStr(size) + "_" + NbrToStr(rank) + "_" + NbrToStr(p) + ".bin"); } - // Local numbering - LocalNumberingBuilder local_numbering_builder(ovr_subdomain_to_global, cluster_to_ovr_subdomain, intersections); - auto &renumbered_intersections = local_numbering_builder.intersections; - auto &local_to_global_numbering = local_numbering_builder.local_to_global_numbering; - int local_size_wo_overlap = Operator.get_target_partition().get_size_of_partition(rank); - // int local_size_with_overlap = local_to_global_numbering.size(); - - std::vector geometry(n), local_geometry(local_to_global_numbering.size() * 3); - bytes_to_vector(geometry, datapath + "/geometry.bin"); - for (int i = 0; i < local_to_global_numbering.size(); i++) { - local_geometry[3 * i + 0] = geometry[3 * local_to_global_numbering[i] + 0]; - local_geometry[3 * i + 1] = geometry[3 * local_to_global_numbering[i] + 1]; - local_geometry[3 * i + 2] = geometry[3 * local_to_global_numbering[i] + 2]; - } - - ClusterTreeBuilder recursive_build_strategy; - // recursive_build_strategy.set_minclustersize(2); - Cluster local_cluster = recursive_build_strategy.create_cluster_tree(local_to_global_numbering.size(), 3, local_geometry.data(), 2, 2, nullptr); - - LocalGeneratorInUserNumberingFromMatrix> local_generator(A, local_to_global_numbering, local_to_global_numbering); - - HMatrixTreeBuilder, double> local_hmatrix_builder(local_cluster, local_cluster, epsilon, eta, symmetric, UPLO, -1, -1, -1); - - HMatrix, double> local_hmatrix = local_hmatrix_builder.build(local_generator); - print_distributed_hmatrix_information(local_hmatrix, std::cout, MPI_COMM_WORLD); - save_leaves_with_rank(local_hmatrix, "local_hmatrix_" + std::to_string(rank)); - // Errors + // Error double error2; // Solve - DefaultDDMSolverBuilder, double> default_ddm_solver(Operator, local_hmatrix, neighbors, renumbered_intersections); - DDM> &ddm_with_overlap = default_ddm_solver.solver; + if (rank == 0) + std::cout << "Creating HMatrix" << std::endl; + std::vector geometry(n); + bytes_to_vector(geometry, datapath + "/geometry.bin"); + solver_builder default_ddm_solver(Operator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections, Generator, 3, geometry.data(), epsilon, eta); + auto &ddm_with_overlap = default_ddm_solver.solver; // No precond with overlap if (rank == 0) @@ -148,13 +128,11 @@ int test_solver_ddm(int argc, char *argv[], int mu, char data_symmetry, char sym MPI_Barrier(MPI_COMM_WORLD); opt.parse("-hpddm_schwarz_method asm "); Matrix> Ki; - if (data_symmetry == 'S' && size > 1) { opt.remove("geneo_threshold"); opt.parse("-hpddm_geneo_nu 2"); Ki.bytes_to_matrix(datapath + "/Ki_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); - // ddm_with_overlap.build_coarse_space(Ki); - + int local_size_wo_overlap = Operator.get_target_partition().get_size_of_partition(rank); auto geneo_coarse_space_dense_builder = GeneoCoarseSpaceDenseBuilder>::GeneoWithNu(local_size_wo_overlap, default_ddm_solver.block_diagonal_dense_matrix, Ki, symmetric, UPLO, 2); // geneo_coarse_space_dense_builder.set_geneo_nu(2); GeneoCoarseOperatorBuilder> geneo_coarse_operator_builder(Operator); @@ -228,10 +206,10 @@ int test_solver_ddm(int argc, char *argv[], int mu, char data_symmetry, char sym std::cout << "RAS two level with overlap and threshold:" << std::endl; opt.parse("-hpddm_schwarz_method ras -hpddm_schwarz_coarse_correction additive"); // DDM> ddm_with_overlap_threshold(Generator, &Operator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); - DefaultDDMSolverBuilder, double> default_ddm_solver_with_threshold(Operator, local_hmatrix, neighbors, renumbered_intersections); - DDM> &ddm_with_overlap_threshold = default_ddm_solver_with_threshold.solver; // build_ddm_solver(Operator, local_block_diagonal_hmatrix, Generator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); + solver_builder default_ddm_solver_with_threshold(Operator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections, Generator, 3, geometry.data(), epsilon, eta); + auto &ddm_with_overlap_threshold = default_ddm_solver_with_threshold.solver; Ki.bytes_to_matrix(datapath + "/Ki_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); - // ddm_with_overlap_threshold.build_coarse_space(Ki); + int local_size_wo_overlap = Operator.get_target_partition().get_size_of_partition(rank); auto geneo_coarse_space_dense_builder = GeneoCoarseSpaceDenseBuilder>::GeneoWithThreshold(local_size_wo_overlap, default_ddm_solver.block_diagonal_dense_matrix, Ki, symmetric, UPLO, 100); // geneo_coarse_space_dense_builder.set_geneo_threshold(100); GeneoCoarseOperatorBuilder> geneo_coarse_operator_builder(Operator); @@ -252,3 +230,225 @@ int test_solver_ddm(int argc, char *argv[], int mu, char data_symmetry, char sym return test; } + +template <> +int test_solver_ddm>>(int argc, char *argv[], int mu, char, char symmetric, std::string datapath) { + + // Get the number of processes + int size; + MPI_Comm_size(MPI_COMM_WORLD, &size); + + // Get the rank of the process + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + // + bool test = 0; + + // HPDDM verbosity + HPDDM::Option &opt = *HPDDM::Option::get(); + opt.parse(argc, argv, rank == 0); + double tol = opt.val("tol", 1e-6); + if (rank != 0) + opt.remove("verbosity"); + opt.parse("-hpddm_max_it 200"); + + // HTOOL + double epsilon = tol; + // epsilon = 1e-10; + double eta = 10; + + char UPLO = 'N'; + if (symmetric != 'N') { + UPLO = 'L'; + } + + // Clustering + if (rank == 0) + std::cout << "Creating cluster tree" << std::endl; + Cluster target_cluster = read_cluster_tree(datapath + "/cluster_" + NbrToStr(size) + "_cluster_tree_properties.csv", datapath + "/cluster_" + NbrToStr(size) + "_cluster_tree.csv"); + + // Matrix + if (rank == 0) + std::cout << "Creating generators" << std::endl; + Matrix> A; + A.bytes_to_matrix(datapath + "/matrix.bin"); + GeneratorInUserNumberingFromMatrix> Generator(A); + int n = A.nb_rows(); + + // Right-hand side + if (rank == 0) + std::cout << "Building rhs" << std::endl; + Matrix> f_global(n, mu); + std::vector> temp(n); + bytes_to_vector(temp, datapath + "/rhs.bin"); + for (int i = 0; i < mu; i++) { + f_global.set_col(i, temp); + } + + // Hmatrix + if (rank == 0) + std::cout << "Creating HMatrix" << std::endl; + + DefaultApproximationBuilder, htool::underlying_type>> default_build(Generator, target_cluster, target_cluster, epsilon, eta, symmetric, UPLO, MPI_COMM_WORLD); + + DistributedOperator> &Operator = default_build.distributed_operator; + + // Global vectors + Matrix> + x_global(n, mu), x_ref(n, mu), test_global(n, mu); + bytes_to_vector(temp, datapath + "sol.bin"); + for (int i = 0; i < mu; i++) { + x_ref.set_col(i, temp); + } + + // Partition + std::vector cluster_to_ovr_subdomain; + std::vector ovr_subdomain_to_global; + std::vector neighbors; + std::vector> intersections; + bytes_to_vector(cluster_to_ovr_subdomain, datapath + "/cluster_to_ovr_subdomain_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); + bytes_to_vector(ovr_subdomain_to_global, datapath + "/ovr_subdomain_to_global_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); + bytes_to_vector(neighbors, datapath + "/neighbors_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); + + intersections.resize(neighbors.size()); + for (int p = 0; p < neighbors.size(); p++) { + bytes_to_vector(intersections[p], datapath + "/intersections_" + NbrToStr(size) + "_" + NbrToStr(rank) + "_" + NbrToStr(p) + ".bin"); + } + + // Error + double error2; + + // Solve + if (rank == 0) + std::cout << "Creating HMatrix" << std::endl; + std::vector geometry(n); + bytes_to_vector(geometry, datapath + "/geometry.bin"); + DDMSolver> default_ddm_solver(Operator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections, Generator, 3, geometry.data(), epsilon, eta); + auto &ddm_with_overlap = default_ddm_solver.solver; + + // No precond with overlap + if (rank == 0) + std::cout << "No precond with overlap:" << std::endl; + + opt.parse("-hpddm_schwarz_method none"); + ddm_with_overlap.solve(f_global.data(), x_global.data(), mu); + ddm_with_overlap.print_infos(); + error2 = normFrob(f_global - A * x_global) / normFrob(f_global); + + if (rank == 0) { + cout << "error: " << error2 << endl; + } + + test = test || !(error2 < tol); + + x_global = 0; + + // DDM one level ASM with overlap + if (rank == 0) + std::cout << "ASM one level with overlap:" << std::endl; + MPI_Barrier(MPI_COMM_WORLD); + opt.parse("-hpddm_schwarz_method asm "); + Matrix> Ki; + // if (data_symmetry == 'S' && size > 1) { + // opt.remove("geneo_threshold"); + // opt.parse("-hpddm_geneo_nu 2"); + // Ki.bytes_to_matrix(datapath + "/Ki_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); + // int local_size_wo_overlap = Operator.get_target_partition().get_size_of_partition(rank); + // auto geneo_coarse_space_dense_builder = GeneoCoarseSpaceDenseBuilder>::GeneoWithNu(local_size_wo_overlap, default_ddm_solver.block_diagonal_dense_matrix, Ki, symmetric, UPLO, 2); + // // geneo_coarse_space_dense_builder.set_geneo_nu(2); + // GeneoCoarseOperatorBuilder> geneo_coarse_operator_builder(Operator); + // ddm_with_overlap.build_coarse_space(geneo_coarse_space_dense_builder, geneo_coarse_operator_builder); + // } + + ddm_with_overlap.facto_one_level(); + ddm_with_overlap.solve(f_global.data(), x_global.data(), mu); + ddm_with_overlap.print_infos(); + error2 = normFrob(f_global - A * x_global) / normFrob(f_global); + + if (rank == 0) { + cout << "error: " << error2 << endl; + } + + test = test || !(error2 < tol); + + x_global = 0; + + // DDM one level RAS with overlap + if (rank == 0) + std::cout << "RAS one level with overlap:" << std::endl; + MPI_Barrier(MPI_COMM_WORLD); + opt.parse("-hpddm_schwarz_method ras "); + ddm_with_overlap.solve(f_global.data(), x_global.data(), mu); + ddm_with_overlap.print_infos(); + error2 = normFrob(f_global - A * x_global) / normFrob(f_global); + if (rank == 0) { + cout << "error: " << error2 << endl; + } + + test = test || !(error2 < tol); + + x_global = 0; + + // DDM two level ASM with overlap + // if (data_symmetry == 'S' && size > 1) { + // if (rank == 0) + // std::cout << "ASM two level with overlap:" << std::endl; + // MPI_Barrier(MPI_COMM_WORLD); + // opt.parse("-hpddm_schwarz_method asm -hpddm_schwarz_coarse_correction additive"); + // ddm_with_overlap.solve(f_global.data(), x_global.data(), mu); + // ddm_with_overlap.print_infos(); + // error2 = normFrob(f_global - A * x_global) / normFrob(f_global); + // if (rank == 0) { + // cout << "error: " << error2 << endl; + // } + + // test = test || !(error2 < tol); + + // x_global = 0; + + // if (rank == 0) + // std::cout << "RAS two level with overlap:" << std::endl; + // MPI_Barrier(MPI_COMM_WORLD); + // opt.parse("-hpddm_schwarz_method ras -hpddm_schwarz_coarse_correction additive"); + // ddm_with_overlap.solve(f_global.data(), x_global.data(), mu); + // ddm_with_overlap.print_infos(); + // ddm_with_overlap.clean(); + // error2 = normFrob(f_global - A * x_global) / normFrob(f_global); + // if (rank == 0) { + // cout << "error: " << error2 << endl; + // } + + // test = test || !(error2 < tol); + + // x_global = 0; + + // // DDM solver with threshold + // if (rank == 0) + // std::cout << "RAS two level with overlap and threshold:" << std::endl; + // opt.parse("-hpddm_schwarz_method ras -hpddm_schwarz_coarse_correction additive"); + // // DDM> ddm_with_overlap_threshold(Generator, &Operator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); + // solver_builder default_ddm_solver_with_threshold(Operator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections, Generator, 3, geometry.data(), epsilon, eta); + // auto &ddm_with_overlap_threshold = default_ddm_solver_with_threshold.solver; + // Ki.bytes_to_matrix(datapath + "/Ki_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); + // int local_size_wo_overlap = Operator.get_target_partition().get_size_of_partition(rank); + // auto geneo_coarse_space_dense_builder = GeneoCoarseSpaceDenseBuilder>::GeneoWithThreshold(local_size_wo_overlap, default_ddm_solver.block_diagonal_dense_matrix, Ki, symmetric, UPLO, 100); + // // geneo_coarse_space_dense_builder.set_geneo_threshold(100); + // GeneoCoarseOperatorBuilder> geneo_coarse_operator_builder(Operator); + // ddm_with_overlap_threshold.build_coarse_space(geneo_coarse_space_dense_builder, geneo_coarse_operator_builder); + // ddm_with_overlap_threshold.facto_one_level(); + // ddm_with_overlap_threshold.solve(f_global.data(), x_global.data(), mu); + // ddm_with_overlap_threshold.print_infos(); + // ddm_with_overlap_threshold.clean(); + // error2 = normFrob(f_global - A * x_global) / normFrob(f_global); + // if (rank == 0) { + // cout << "error: " << error2 << endl; + // } + + // test = test || !(error2 < tol); + + // x_global = 0; + // } + + return test; +} diff --git a/tests/functional_tests/solvers/test_solver_ddm_adding_overlap.hpp b/tests/functional_tests/solvers/test_solver_ddm_adding_overlap.hpp index f89aaec3..97ce3d1b 100644 --- a/tests/functional_tests/solvers/test_solver_ddm_adding_overlap.hpp +++ b/tests/functional_tests/solvers/test_solver_ddm_adding_overlap.hpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -7,6 +8,7 @@ using namespace std; using namespace htool; +template int test_solver_ddm_adding_overlap(int argc, char *argv[], int mu, char data_symmetry, char symmetric, std::string datapath) { // Get the number of processes @@ -64,12 +66,10 @@ int test_solver_ddm_adding_overlap(int argc, char *argv[], int mu, char data_sym if (rank == 0) std::cout << "Creating HMatrix" << std::endl; - const HMatrix *local_block_diagonal_hmatrix = nullptr; - DefaultApproximationBuilder> default_build(Generator, target_cluster, target_cluster, epsilon, eta, symmetric, UPLO, MPI_COMM_WORLD); - DistributedOperator &Operator = default_build.distributed_operator; - local_block_diagonal_hmatrix = default_build.block_diagonal_hmatrix; + DistributedOperator &Operator = default_build.distributed_operator; + HMatrix local_block_diagonal_hmatrix = *default_build.block_diagonal_hmatrix; // Global vectors Matrix> @@ -97,8 +97,10 @@ int test_solver_ddm_adding_overlap(int argc, char *argv[], int mu, char data_sym double error2; // Solve - DefaultDDMSolverBuilderAddingOverlap, double> default_ddm_solver(Operator, local_block_diagonal_hmatrix, Generator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); - DDM> &ddm_with_overlap = default_ddm_solver.solver; + if (rank == 0) + std::cout << "Creating HMatrix" << std::endl; + solver_builder default_ddm_solver(Operator, local_block_diagonal_hmatrix, Generator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); + auto &ddm_with_overlap = default_ddm_solver.solver; // No precond with overlap if (rank == 0) @@ -127,9 +129,7 @@ int test_solver_ddm_adding_overlap(int argc, char *argv[], int mu, char data_sym opt.remove("geneo_threshold"); opt.parse("-hpddm_geneo_nu 2"); Ki.bytes_to_matrix(datapath + "/Ki_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); - // ddm_with_overlap.build_coarse_space(Ki); - int local_size_wo_overlap = Operator.get_target_partition().get_size_of_partition(rank); - // int local_size_with_overlap = ovr_subdomain_to_global.size(); + int local_size_wo_overlap = Operator.get_target_partition().get_size_of_partition(rank); auto geneo_coarse_space_dense_builder = GeneoCoarseSpaceDenseBuilder>::GeneoWithNu(local_size_wo_overlap, default_ddm_solver.block_diagonal_dense_matrix, Ki, symmetric, UPLO, 2); // geneo_coarse_space_dense_builder.set_geneo_nu(4); GeneoCoarseOperatorBuilder> geneo_coarse_operator_builder(Operator); @@ -203,10 +203,10 @@ int test_solver_ddm_adding_overlap(int argc, char *argv[], int mu, char data_sym std::cout << "RAS two level with overlap and threshold:" << std::endl; opt.parse("-hpddm_schwarz_method ras -hpddm_schwarz_coarse_correction additive -hpddm_geneo_threshold 100"); // DDM> ddm_with_overlap_threshold(Generator, &Operator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); - DefaultDDMSolverBuilderAddingOverlap, double> default_ddm_solver_with_threshold(Operator, local_block_diagonal_hmatrix, Generator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); - DDM> &ddm_with_overlap_threshold = default_ddm_solver_with_threshold.solver; // build_ddm_solver(Operator, local_block_diagonal_hmatrix, Generator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); + solver_builder default_ddm_solver_with_threshold(Operator, local_block_diagonal_hmatrix, Generator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); + auto &ddm_with_overlap_threshold = default_ddm_solver_with_threshold.solver; + Ki.bytes_to_matrix(datapath + "/Ki_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); - // ddm_with_overlap_threshold.build_coarse_space(Ki); int local_size_wo_overlap = Operator.get_target_partition().get_size_of_partition(rank); auto geneo_coarse_space_dense_builder = GeneoCoarseSpaceDenseBuilder>::GeneoWithThreshold(local_size_wo_overlap, default_ddm_solver.block_diagonal_dense_matrix, Ki, symmetric, UPLO, 100); // geneo_coarse_space_dense_builder.set_geneo_threshold(100); @@ -228,3 +228,224 @@ int test_solver_ddm_adding_overlap(int argc, char *argv[], int mu, char data_sym return test; } + +template <> +int test_solver_ddm_adding_overlap>>(int argc, char *argv[], int mu, char, char symmetric, std::string datapath) { + + // Get the number of processes + int size; + MPI_Comm_size(MPI_COMM_WORLD, &size); + + // Get the rank of the process + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + // + bool test = 0; + + // HPDDM verbosity + HPDDM::Option &opt = *HPDDM::Option::get(); + opt.parse(argc, argv, rank == 0); + double tol = opt.val("tol", 1e-6); + if (rank != 0) + opt.remove("verbosity"); + opt.parse("-hpddm_max_it 200"); + + // HTOOL + double epsilon = tol; + double eta = 10; + + char UPLO = 'N'; + if (symmetric != 'N') { + UPLO = 'L'; + } + + // Clustering + if (rank == 0) + std::cout << "Creating cluster tree" << std::endl; + Cluster target_cluster = read_cluster_tree(datapath + "/cluster_" + NbrToStr(size) + "_cluster_tree_properties.csv", datapath + "/cluster_" + NbrToStr(size) + "_cluster_tree.csv"); + + // Matrix + if (rank == 0) + std::cout << "Creating generators" << std::endl; + Matrix> A; + A.bytes_to_matrix(datapath + "/matrix.bin"); + GeneratorInUserNumberingFromMatrix> Generator(A); + int n = A.nb_rows(); + + // Right-hand side + if (rank == 0) + std::cout << "Building rhs" << std::endl; + Matrix> f_global(n, mu); + std::vector> temp(n); + bytes_to_vector(temp, datapath + "/rhs.bin"); + for (int i = 0; i < mu; i++) { + f_global.set_col(i, temp); + } + + // Hmatrix + if (rank == 0) + std::cout << "Creating HMatrix" << std::endl; + + DefaultApproximationBuilder> default_build(Generator, target_cluster, target_cluster, epsilon, eta, symmetric, UPLO, MPI_COMM_WORLD); + + DistributedOperator &Operator = default_build.distributed_operator; + HMatrix local_block_diagonal_hmatrix = *default_build.block_diagonal_hmatrix; + + // Global vectors + Matrix> + x_global(n, mu), x_ref(n, mu), test_global(n, mu); + bytes_to_vector(temp, datapath + "sol.bin"); + for (int i = 0; i < mu; i++) { + x_ref.set_col(i, temp); + } + + // Partition + std::vector cluster_to_ovr_subdomain; + std::vector ovr_subdomain_to_global; + std::vector neighbors; + std::vector> intersections; + bytes_to_vector(cluster_to_ovr_subdomain, datapath + "/cluster_to_ovr_subdomain_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); + bytes_to_vector(ovr_subdomain_to_global, datapath + "/ovr_subdomain_to_global_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); + bytes_to_vector(neighbors, datapath + "/neighbors_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); + + intersections.resize(neighbors.size()); + for (int p = 0; p < neighbors.size(); p++) { + bytes_to_vector(intersections[p], datapath + "/intersections_" + NbrToStr(size) + "_" + NbrToStr(rank) + "_" + NbrToStr(p) + ".bin"); + } + + // Errors + double error2; + + // Solve + if (rank == 0) + std::cout << "Creating HMatrix" << std::endl; + DDMSolver> default_ddm_solver(Operator, local_block_diagonal_hmatrix, Generator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); + auto &ddm_with_overlap = default_ddm_solver.solver; + + // No precond with overlap + if (rank == 0) + std::cout << "No precond with overlap:" << std::endl; + + opt.parse("-hpddm_schwarz_method none"); + ddm_with_overlap.solve(f_global.data(), x_global.data(), mu); + ddm_with_overlap.print_infos(); + error2 = normFrob(f_global - A * x_global) / normFrob(f_global); + + if (rank == 0) { + cout << "error: " << error2 << endl; + } + + test = test || !(error2 < tol); + + x_global = 0; + + // DDM one level ASM with overlap + if (rank == 0) + std::cout << "ASM one level with overlap:" << std::endl; + MPI_Barrier(MPI_COMM_WORLD); + opt.parse("-hpddm_schwarz_method asm "); + Matrix> Ki; + // if (data_symmetry == 'S' && size > 1) { + // opt.remove("geneo_threshold"); + // opt.parse("-hpddm_geneo_nu 2"); + // Ki.bytes_to_matrix(datapath + "/Ki_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); + // int local_size_wo_overlap = Operator.get_target_partition().get_size_of_partition(rank); + // auto geneo_coarse_space_dense_builder = GeneoCoarseSpaceDenseBuilder>::GeneoWithNu(local_size_wo_overlap, default_ddm_solver.block_diagonal_dense_matrix, Ki, symmetric, UPLO, 2); + // // geneo_coarse_space_dense_builder.set_geneo_nu(4); + // GeneoCoarseOperatorBuilder> geneo_coarse_operator_builder(Operator); + // ddm_with_overlap.build_coarse_space(geneo_coarse_space_dense_builder, geneo_coarse_operator_builder); + // } + + ddm_with_overlap.facto_one_level(); + ddm_with_overlap.solve(f_global.data(), x_global.data(), mu); + ddm_with_overlap.print_infos(); + error2 = normFrob(f_global - A * x_global) / normFrob(f_global); + + if (rank == 0) { + cout << "error: " << error2 << endl; + } + + test = test || !(error2 < tol); + + x_global = 0; + + // DDM one level RAS with overlap + if (rank == 0) + std::cout << "RAS one level with overlap:" << std::endl; + MPI_Barrier(MPI_COMM_WORLD); + opt.parse("-hpddm_schwarz_method ras "); + ddm_with_overlap.solve(f_global.data(), x_global.data(), mu); + ddm_with_overlap.print_infos(); + error2 = normFrob(f_global - A * x_global) / normFrob(f_global); + if (rank == 0) { + cout << "error: " << error2 << endl; + } + + test = test || !(error2 < tol); + + x_global = 0; + + // DDM two level ASM with overlap + // if (data_symmetry == 'S' && size > 1) { + // if (rank == 0) + // std::cout << "ASM two level with overlap:" << std::endl; + // MPI_Barrier(MPI_COMM_WORLD); + // opt.parse("-hpddm_schwarz_method asm -hpddm_schwarz_coarse_correction additive"); + // ddm_with_overlap.solve(f_global.data(), x_global.data(), mu); + // ddm_with_overlap.print_infos(); + // error2 = normFrob(f_global - A * x_global) / normFrob(f_global); + // if (rank == 0) { + // cout << "error: " << error2 << endl; + // } + + // test = test || !(error2 < tol); + + // x_global = 0; + + // if (rank == 0) + // std::cout << "RAS two level with overlap:" << std::endl; + // MPI_Barrier(MPI_COMM_WORLD); + // opt.parse("-hpddm_schwarz_method ras -hpddm_schwarz_coarse_correction additive"); + // ddm_with_overlap.solve(f_global.data(), x_global.data(), mu); + // ddm_with_overlap.print_infos(); + // ddm_with_overlap.clean(); + // error2 = normFrob(f_global - A * x_global) / normFrob(f_global); + // if (rank == 0) { + // cout << "error: " << error2 << endl; + // } + + // test = test || !(error2 < tol); + + // x_global = 0; + + // // DDM solver with threshold + // if (rank == 0) + // std::cout << "RAS two level with overlap and threshold:" << std::endl; + // opt.parse("-hpddm_schwarz_method ras -hpddm_schwarz_coarse_correction additive -hpddm_geneo_threshold 100"); + // // DDM> ddm_with_overlap_threshold(Generator, &Operator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); + // solver_builder default_ddm_solver_with_threshold(Operator, local_block_diagonal_hmatrix, Generator, ovr_subdomain_to_global, cluster_to_ovr_subdomain, neighbors, intersections); + // auto &ddm_with_overlap_threshold = default_ddm_solver_with_threshold.solver; + + // Ki.bytes_to_matrix(datapath + "/Ki_" + NbrToStr(size) + "_" + NbrToStr(rank) + ".bin"); + // int local_size_wo_overlap = Operator.get_target_partition().get_size_of_partition(rank); + // auto geneo_coarse_space_dense_builder = GeneoCoarseSpaceDenseBuilder>::GeneoWithThreshold(local_size_wo_overlap, default_ddm_solver.block_diagonal_dense_matrix, Ki, symmetric, UPLO, 100); + // // geneo_coarse_space_dense_builder.set_geneo_threshold(100); + // GeneoCoarseOperatorBuilder> geneo_coarse_operator_builder(Operator); + // ddm_with_overlap_threshold.build_coarse_space(geneo_coarse_space_dense_builder, geneo_coarse_operator_builder); + // ddm_with_overlap_threshold.facto_one_level(); + // ddm_with_overlap_threshold.solve(f_global.data(), x_global.data(), mu); + // ddm_with_overlap_threshold.print_infos(); + // ddm_with_overlap_threshold.clean(); + // error2 = normFrob(f_global - A * x_global) / normFrob(f_global); + // if (rank == 0) { + // cout << "error: " << error2 << endl; + // } + + // test = test || !(error2 < tol); + + // x_global = 0; + // } + + return test; +} diff --git a/tests/functional_tests/solvers/test_solver_wo_overlap.hpp b/tests/functional_tests/solvers/test_solver_wo_overlap.hpp index 8e2eae47..18d94b15 100644 --- a/tests/functional_tests/solvers/test_solver_wo_overlap.hpp +++ b/tests/functional_tests/solvers/test_solver_wo_overlap.hpp @@ -7,6 +7,7 @@ using namespace std; using namespace htool; +template int test_solver_wo_overlap(int argc, char *argv[], int mu, char symmetric, std::string datapath) { // Get the number of processes @@ -42,13 +43,6 @@ int test_solver_wo_overlap(int argc, char *argv[], int mu, char symmetric, std:: std::cout << "Creating cluster tree" << std::endl; Cluster target_cluster = read_cluster_tree(datapath + "/cluster_" + NbrToStr(size) + "_cluster_tree_properties.csv", datapath + "/cluster_" + NbrToStr(size) + "_cluster_tree.csv"); - // std::vectortab(n); - // std::iota(tab.begin(),tab.end(),int(0)); - // t->build(p,std::vector(n,0),tab,std::vector(n,1),2); - // std::vector permutation_test(n); - // bytes_to_vector(permutation_test,datapath+"permutation.bin"); - // t->save(p,"test_cluster",{0,1,2,3}); - // Matrix if (rank == 0) std::cout << "Creating generators" << std::endl; @@ -71,12 +65,10 @@ int test_solver_wo_overlap(int argc, char *argv[], int mu, char symmetric, std:: if (rank == 0) std::cout << "Creating HMatrix" << std::endl; - const HMatrix *local_block_diagonal_hmatrix = nullptr; - DefaultApproximationBuilder> default_build(Generator, target_cluster, target_cluster, epsilon, eta, symmetric, UPLO, MPI_COMM_WORLD); - DistributedOperator &Operator = default_build.distributed_operator; - local_block_diagonal_hmatrix = default_build.block_diagonal_hmatrix; + DistributedOperator &Operator = default_build.distributed_operator; + HMatrix local_block_diagonal_hmatrix = *default_build.block_diagonal_hmatrix; // Global vectors Matrix> @@ -104,9 +96,12 @@ int test_solver_wo_overlap(int argc, char *argv[], int mu, char symmetric, std:: double error2; // Solve - DefaultSolverBuilder, double> default_solver(Operator, local_block_diagonal_hmatrix); - DDM> &block_jacobi_solver = default_solver.solver; + if (rank == 0) + std::cout << "Creating Solver" << std::endl; + solver_builder default_solver(Operator, local_block_diagonal_hmatrix); + auto &block_jacobi_solver = default_solver.solver; + print_hmatrix_information(local_block_diagonal_hmatrix, std::cout); // No precond wo overlap if (rank == 0) std::cout << "No precond without overlap:" << std::endl;