Skip to content

Commit

Permalink
fix second level for ddm
Browse files Browse the repository at this point in the history
  • Loading branch information
PierreMarchand20 committed Aug 7, 2024
1 parent ff33b04 commit 60c0f38
Show file tree
Hide file tree
Showing 8 changed files with 242 additions and 222 deletions.
2 changes: 1 addition & 1 deletion include/htool/solvers/ddm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ DDM<CoefficientPrecision, HPDDMCustomLocalSolver> make_DDM_solver_w_custom_local

template <typename CoefficientPrecision, typename CoordinatePrecision = underlying_type<CoefficientPrecision>>
DDM<CoefficientPrecision, HPDDMCustomLocalSolver> make_DDM_solver_w_custom_local_solver(const DistributedOperator<CoefficientPrecision> &distributed_operator, HMatrix<CoefficientPrecision, CoordinatePrecision> &local_hmatrix, Matrix<CoefficientPrecision> &B, Matrix<CoefficientPrecision> &C, Matrix<CoefficientPrecision> &D, const std::vector<int> &neighbors, const std::vector<std::vector<int>> &intersections) {
int n = local_hmatrix.get_target_cluster().get_size() + C.nb_rows();
int n = local_hmatrix.get_target_cluster().get_size() + D.nb_rows();

// Timing
double mytime, maxtime;
Expand Down
38 changes: 32 additions & 6 deletions include/htool/solvers/geneo/coarse_space_builder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

namespace htool {

template <typename CoefficientPrecision>
template <typename CoefficientPrecision, typename CoordinatePrecision = underlying_type<CoefficientPrecision>>
class GeneoCoarseSpaceDenseBuilder : public VirtualCoarseSpaceBuilder<CoefficientPrecision> {

protected:
Expand All @@ -24,16 +24,42 @@ class GeneoCoarseSpaceDenseBuilder : public VirtualCoarseSpaceBuilder<Coefficien
char m_uplo = 'N';
int m_geneo_nu = 2;
htool::underlying_type<CoefficientPrecision> m_geneo_threshold = -1.;
explicit GeneoCoarseSpaceDenseBuilder(int size_wo_overlap, const Matrix<CoefficientPrecision> &Ai, Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, int geneo_nu, htool::underlying_type<CoefficientPrecision> geneo_threshold) : m_size_wo_overlap(size_wo_overlap), m_size_with_overlap(Ai.nb_cols()), m_DAiD(m_size_with_overlap, m_size_with_overlap), m_Bi(Bi), m_symmetry(symmetry), m_uplo(uplo), m_geneo_nu(geneo_nu), m_geneo_threshold(geneo_threshold) {
for (int i = 0; i < m_size_wo_overlap; i++) {
std::copy_n(Ai.data() + i * m_size_with_overlap, m_size_wo_overlap, &(m_DAiD(0, i)));
explicit GeneoCoarseSpaceDenseBuilder(int size_wo_overlap, int size_with_overlap, const Matrix<CoefficientPrecision> &Ai, const Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, int geneo_nu, htool::underlying_type<CoefficientPrecision> geneo_threshold) : m_size_wo_overlap(size_wo_overlap), m_size_with_overlap(size_with_overlap), m_DAiD(m_size_with_overlap, m_size_with_overlap), m_Bi(Bi), m_symmetry(symmetry), m_uplo(uplo), m_geneo_nu(geneo_nu), m_geneo_threshold(geneo_threshold) {
if (Ai.nb_cols() == size_with_overlap) {
for (int i = 0; i < m_size_wo_overlap; i++) {
std::copy_n(Ai.data() + i * m_size_with_overlap, m_size_wo_overlap, &(m_DAiD(0, i)));
}
} else if (Ai.nb_cols() == size_wo_overlap) {
for (int i = 0; i < m_size_wo_overlap; i++) {
std::copy_n(Ai.data() + i * m_size_wo_overlap, m_size_wo_overlap, &(m_DAiD(0, i)));
}
}
}

public:
static GeneoCoarseSpaceDenseBuilder GeneoWithNu(int size_wo_overlap, const Matrix<CoefficientPrecision> &Ai, Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, int geneo_nu) { return GeneoCoarseSpaceDenseBuilder{size_wo_overlap, Ai, Bi, symmetry, uplo, geneo_nu, -1}; }
static GeneoCoarseSpaceDenseBuilder GeneoWithNu(int size_wo_overlap, int size_with_overlap, const Matrix<CoefficientPrecision> &Ai, const Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, int geneo_nu) { return GeneoCoarseSpaceDenseBuilder{size_wo_overlap, size_with_overlap, Ai, Bi, symmetry, uplo, geneo_nu, -1}; }

static GeneoCoarseSpaceDenseBuilder GeneoWithNu(int size_wo_overlap, int size_with_overlap, const HMatrix<CoefficientPrecision, CoordinatePrecision> &Ai, const Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, int geneo_nu) {
Matrix<CoefficientPrecision> temp(Ai.nb_rows(), Ai.nb_cols());
if (Ai.nb_rows() == size_with_overlap) {
copy_to_dense_in_user_numbering(Ai, temp.data());
} else if (Ai.nb_rows() == size_wo_overlap) {
copy_to_dense(Ai, temp.data());
}
return GeneoCoarseSpaceDenseBuilder{size_wo_overlap, size_with_overlap, temp, Bi, symmetry, uplo, geneo_nu, -1};
}

static GeneoCoarseSpaceDenseBuilder GeneoWithThreshold(int size_wo_overlap, int size_with_overlap, const Matrix<CoefficientPrecision> &Ai, const Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, htool::underlying_type<CoefficientPrecision> geneo_threshold) { return GeneoCoarseSpaceDenseBuilder{size_wo_overlap, size_with_overlap, Ai, Bi, symmetry, uplo, 0, geneo_threshold}; }

static GeneoCoarseSpaceDenseBuilder GeneoWithThreshold(int size_wo_overlap, const Matrix<CoefficientPrecision> &Ai, Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, htool::underlying_type<CoefficientPrecision> geneo_threshold) { return GeneoCoarseSpaceDenseBuilder{size_wo_overlap, Ai, Bi, symmetry, uplo, 0, geneo_threshold}; }
static GeneoCoarseSpaceDenseBuilder GeneoWithThreshold(int size_wo_overlap, int size_with_overlap, const HMatrix<CoefficientPrecision, CoordinatePrecision> &Ai, const Matrix<CoefficientPrecision> &Bi, char symmetry, char uplo, htool::underlying_type<CoefficientPrecision> geneo_threshold) {
Matrix<CoefficientPrecision> temp(Ai.nb_rows(), Ai.nb_cols());
if (Ai.nb_rows() == size_with_overlap) {
copy_to_dense_in_user_numbering(Ai, temp.data());
} else if (Ai.nb_rows() == size_wo_overlap) {
copy_to_dense(Ai, temp.data());
}
return GeneoCoarseSpaceDenseBuilder{size_wo_overlap, size_with_overlap, temp, Bi, symmetry, uplo, 0, geneo_threshold};
}

virtual Matrix<CoefficientPrecision> build_coarse_space() override {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,20 @@ class LocalHMatrixPlusOverlapSolver : public VirtualLocalSolver<CoefficientPreci

} else if (m_local_hmatrix.get_symmetry() == 'S' || m_local_hmatrix.get_symmetry() == 'H') {
cholesky_factorization(m_local_hmatrix.get_UPLO(), m_local_hmatrix);
if (m_C.nb_rows() > 0) {
if (m_local_hmatrix.get_UPLO() == 'L' && m_C.nb_rows() > 0) {
triangular_hmatrix_matrix_solve('R', m_local_hmatrix.get_UPLO(), is_complex<CoefficientPrecision>() ? 'C' : 'T', 'N', CoefficientPrecision(1), m_local_hmatrix, m_C);
add_matrix_matrix_product('N', is_complex<CoefficientPrecision>() ? 'C' : 'T', CoefficientPrecision(-1), m_C, m_C, CoefficientPrecision(1), m_D);
cholesky_factorization(m_local_hmatrix.get_UPLO(), m_D);
} else if (m_local_hmatrix.get_UPLO() == 'U' && m_B.nb_cols() > 0) {
triangular_hmatrix_matrix_solve('L', m_local_hmatrix.get_UPLO(), is_complex<CoefficientPrecision>() ? 'C' : 'T', 'N', CoefficientPrecision(1), m_local_hmatrix, m_B);
add_matrix_matrix_product(is_complex<CoefficientPrecision>() ? 'C' : 'T', 'N', CoefficientPrecision(-1), m_B, m_B, CoefficientPrecision(1), m_D);
cholesky_factorization(m_local_hmatrix.get_UPLO(), m_D);
}
}
}
void solve(CoefficientPrecision *const b, const unsigned short &mu = 1) const {
int local_size_wo_overlap = m_local_hmatrix.get_target_cluster().get_size();
int size_overlap = m_C.nb_rows();
int size_overlap = m_D.nb_rows();
int local_size_w_overlap = local_size_wo_overlap + size_overlap;
Matrix<CoefficientPrecision> b1(local_size_wo_overlap, mu), b2(size_overlap, mu);

Expand All @@ -50,30 +54,7 @@ class LocalHMatrixPlusOverlapSolver : public VirtualLocalSolver<CoefficientPreci
std::copy_n(b + i * local_size_w_overlap + local_size_wo_overlap, size_overlap, b2.data() + i * size_overlap);
}

if (m_local_hmatrix.get_symmetry() == 'N') {

triangular_hmatrix_matrix_solve('L', 'L', 'N', 'U', CoefficientPrecision(1), m_local_hmatrix, b1);
if (m_C.nb_rows() > 0) {
add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_C, b1, CoefficientPrecision(1), b2);
triangular_matrix_matrix_solve('L', 'L', 'N', 'U', CoefficientPrecision(1), m_D, b2);

triangular_matrix_matrix_solve('L', 'U', 'N', 'N', CoefficientPrecision(1), m_D, b2);
add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_B, b2, CoefficientPrecision(1), b1);
}
triangular_hmatrix_matrix_solve('L', 'U', 'N', 'N', CoefficientPrecision(1), m_local_hmatrix, b1);

} else if (m_local_hmatrix.get_symmetry() == 'S' || m_local_hmatrix.get_symmetry() == 'H') {
triangular_hmatrix_matrix_solve('L', 'L', 'N', 'N', CoefficientPrecision(1), m_local_hmatrix, b1);

if (m_C.nb_rows() > 0) {
add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_C, b1, CoefficientPrecision(1), b2);
triangular_matrix_matrix_solve('L', 'L', 'N', 'U', CoefficientPrecision(1), m_D, b2);

triangular_matrix_matrix_solve('L', 'L', is_complex<CoefficientPrecision>() ? 'C' : 'T', 'N', CoefficientPrecision(1), m_D, b2);
add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_C, b2, CoefficientPrecision(1), b1);
}
triangular_hmatrix_matrix_solve('L', 'L', is_complex<CoefficientPrecision>() ? 'C' : 'T', 'N', CoefficientPrecision(1), m_local_hmatrix, b1);
}
this->solve(b1, b2);

for (int i = 0; i < mu; i++) {
std::copy_n(b1.data() + i * local_size_wo_overlap, local_size_wo_overlap, b + i * local_size_w_overlap);
Expand All @@ -83,42 +64,56 @@ class LocalHMatrixPlusOverlapSolver : public VirtualLocalSolver<CoefficientPreci
void solve(const CoefficientPrecision *const b, CoefficientPrecision *const x, const unsigned short &mu = 1) const {

int local_size_wo_overlap = m_local_hmatrix.get_target_cluster().get_size();
int size_overlap = m_C.nb_rows();
int size_overlap = m_D.nb_rows();
int local_size_w_overlap = local_size_wo_overlap + size_overlap;
Matrix<CoefficientPrecision> b1(local_size_wo_overlap, mu), b2(size_overlap, mu);

for (int i = 0; i < mu; i++) {
std::copy_n(b + i * local_size_w_overlap, local_size_wo_overlap, b1.data() + i * local_size_wo_overlap);
std::copy_n(b + i * local_size_w_overlap + local_size_wo_overlap, size_overlap, b2.data() + i * size_overlap);
}
this->solve(b1, b2);

for (int i = 0; i < mu; i++) {
std::copy_n(b1.data() + i * local_size_wo_overlap, local_size_wo_overlap, x + i * local_size_w_overlap);
std::copy_n(b2.data() + i * size_overlap, size_overlap, x + i * local_size_w_overlap + local_size_wo_overlap);
}
}

private:
void solve(Matrix<CoefficientPrecision> &b1, Matrix<CoefficientPrecision> &b2) const {

if (m_local_hmatrix.get_symmetry() == 'N') {

triangular_hmatrix_matrix_solve('L', 'L', 'N', 'U', CoefficientPrecision(1), m_local_hmatrix, b1);

if (m_C.nb_rows() > 0) {
add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_C, b1, CoefficientPrecision(1), b2);
triangular_matrix_matrix_solve('L', 'L', 'N', 'U', CoefficientPrecision(1), m_D, b2);

triangular_matrix_matrix_solve('L', 'U', 'N', 'N', CoefficientPrecision(1), m_D, b2);
add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_B, b2, CoefficientPrecision(1), b1);
}
triangular_hmatrix_matrix_solve('L', 'U', 'N', 'N', CoefficientPrecision(1), m_local_hmatrix, b1);

} else if (m_local_hmatrix.get_symmetry() == 'S' || m_local_hmatrix.get_symmetry() == 'H') {
} else if ((m_local_hmatrix.get_symmetry() == 'S' || m_local_hmatrix.get_symmetry() == 'H') && m_local_hmatrix.get_UPLO() == 'L') {
triangular_hmatrix_matrix_solve('L', 'L', 'N', 'N', CoefficientPrecision(1), m_local_hmatrix, b1);

if (m_C.nb_rows() > 0) {
add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_C, b1, CoefficientPrecision(1), b2);
triangular_matrix_matrix_solve('L', 'L', 'N', 'U', CoefficientPrecision(1), m_D, b2);

triangular_matrix_matrix_solve('L', 'L', is_complex<CoefficientPrecision>() ? 'C' : 'T', 'N', CoefficientPrecision(1), m_D, b2);
add_matrix_matrix_product(is_complex<CoefficientPrecision>() ? 'C' : 'T', 'N', CoefficientPrecision(-1), m_C, b2, CoefficientPrecision(1), b1);
}
triangular_hmatrix_matrix_solve('L', 'L', is_complex<CoefficientPrecision>() ? 'C' : 'T', 'N', CoefficientPrecision(1), m_local_hmatrix, b1);
}
// std::cout << "TEST " << get_max(b1) << " " << get_max(b2) << "\n";
for (int i = 0; i < mu; i++) {
std::copy_n(b1.data() + i * local_size_wo_overlap, local_size_wo_overlap, x + i * local_size_w_overlap);
std::copy_n(b2.data() + i * size_overlap, size_overlap, x + i * local_size_w_overlap + local_size_wo_overlap);
} else if ((m_local_hmatrix.get_symmetry() == 'S' || m_local_hmatrix.get_symmetry() == 'H') && m_local_hmatrix.get_UPLO() == 'U') {
triangular_hmatrix_matrix_solve('L', 'U', is_complex<CoefficientPrecision>() ? 'C' : 'T', 'N', CoefficientPrecision(1), m_local_hmatrix, b1);

if (m_B.nb_cols() > 0) {
add_matrix_matrix_product(is_complex<CoefficientPrecision>() ? 'C' : 'T', 'N', CoefficientPrecision(-1), m_B, b1, CoefficientPrecision(1), b2);
triangular_matrix_matrix_solve('L', 'U', 'N', 'U', CoefficientPrecision(1), m_D, b2);
add_matrix_matrix_product('N', 'N', CoefficientPrecision(-1), m_B, b2, CoefficientPrecision(1), b1);
}

triangular_hmatrix_matrix_solve('L', 'U', 'N', 'N', CoefficientPrecision(1), m_local_hmatrix, b1);
}
}
};
Expand Down
Loading

0 comments on commit 60c0f38

Please sign in to comment.