Skip to content

Commit

Permalink
add hlu
Browse files Browse the repository at this point in the history
  • Loading branch information
PierreMarchand20 committed Apr 19, 2024
1 parent 5190c57 commit 2cb8363
Show file tree
Hide file tree
Showing 82 changed files with 3,747 additions and 1,182 deletions.
8 changes: 8 additions & 0 deletions include/htool/clustering/cluster_node.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,14 @@ bool is_cluster_on_partition(const Cluster<CoordinatesPrecision> &cluster) {
return cluster.get_depth() == cluster.get_clusters_on_partition()[0]->get_depth();
}

template <typename CoordinatesPrecision>
bool left_cluster_contains_right_cluster(const Cluster<CoordinatesPrecision> &cluster1, const Cluster<CoordinatesPrecision> &cluster2) {

if (cluster1.get_offset() <= cluster2.get_offset() && cluster1.get_size() + cluster1.get_offset() >= cluster2.get_size() + cluster2.get_offset()) {
return true;
}
return false;
}
// template <typename CoordinatesPrecision>
// Cluster<CoordinatesPrecision> clone_cluster_tree_from_partition(const Cluster<CoordinatesPrecision> &cluster, int index) {
// if (!cluster.is_permutation_local()) {
Expand Down
8 changes: 5 additions & 3 deletions include/htool/hmatrix/hmatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ class HMatrix : public TreeNode<HMatrix<CoefficientPrecision, CoordinatePrecisio
const Cluster<CoordinatePrecision> &get_source_cluster() const { return *m_source_cluster; }
int nb_cols() const { return m_source_cluster->get_size(); }
int nb_rows() const { return m_target_cluster->get_size(); }
htool::underlying_type<CoefficientPrecision> get_epsilon() const { return this->m_tree_data->m_epsilon; }

HMatrix<CoefficientPrecision, CoordinatePrecision> *get_child_or_this(const Cluster<CoordinatePrecision> &required_target_cluster, const Cluster<CoordinatePrecision> &required_source_cluster) {
if (*m_target_cluster == required_target_cluster and *m_source_cluster == required_source_cluster) {
Expand Down Expand Up @@ -279,12 +280,13 @@ class HMatrix : public TreeNode<HMatrix<CoefficientPrecision, CoordinatePrecisio
}
void clear_low_rank_data() { m_low_rank_data.reset(); }

void set_dense_data(Matrix<CoefficientPrecision> &dense_matrix) {
void set_dense_data(std::unique_ptr<Matrix<CoefficientPrecision>> dense_matrix_ptr) {
this->delete_children();
m_leaves.clear();
m_leaves_for_symmetry.clear();
m_dense_data = std::make_unique<Matrix<CoefficientPrecision>>();
m_dense_data->assign(dense_matrix.nb_rows(), dense_matrix.nb_cols(), dense_matrix.release(), true);
m_dense_data = std::move(dense_matrix_ptr);
// m_dense_data = std::make_unique<Matrix<CoefficientPrecision>>();
// m_dense_data->assign(dense_matrix.nb_rows(), dense_matrix.nb_cols(), dense_matrix.release(), true);
m_storage_type = StorageType::Dense;
}

Expand Down
3 changes: 2 additions & 1 deletion include/htool/hmatrix/hmatrix_output.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
#define HTOOL_HMATRIX_OUTPUT_HPP

#include <array>
#include <iostream>
#include <fstream>
#include <map>
#include <vector>
#if defined(_OPENMP)
# include <omp.h>
#endif
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#ifndef HTOOL_BLOCKS_ADMISSIBILITY_CONDITIONS_HPP
#define HTOOL_BLOCKS_ADMISSIBILITY_CONDITIONS_HPP

#include "../../basic_types/vector.hpp"
#include "../../clustering/cluster_node.hpp"

namespace htool {

template <typename CoordinatePrecision>
Expand Down
559 changes: 294 additions & 265 deletions include/htool/hmatrix/linalg/add_hmatrix_hmatrix_product.hpp

Large diffs are not rendered by default.

91 changes: 58 additions & 33 deletions include/htool/hmatrix/linalg/add_hmatrix_lrmat_product.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,35 @@

#include "../hmatrix.hpp"
#include "add_hmatrix_matrix_product.hpp"
#include "add_lrmat_hmatrix.hpp"

namespace htool {

// template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
// void add_hmatrix_lrmat_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const LowRankMatrix<CoefficientPrecision, CoordinatePrecision> &B, CoefficientPrecision beta, LowRankMatrix<CoefficientPrecision, CoordinatePrecision> &C) {
// if (transb != 'N') {
// htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE
// }
// auto &U = B.get_U();
// auto &V = B.get_V();
// auto rank = B.rank_of();
// }

template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
void add_hmatrix_lrmat_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const LowRankMatrix<CoefficientPrecision, CoordinatePrecision> &B, CoefficientPrecision beta, Matrix<CoefficientPrecision> &C) {
if (transb != 'N') {
htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE
}

auto rank = B.rank_of();

if (rank != 0) {
auto &U = B.get_U();
auto &V = B.get_V();
Matrix<CoefficientPrecision> AU;
if (transa == 'N') {
AU.resize(A.nb_rows(), U.nb_cols());
AU.resize(A.nb_rows(), transb == 'N' ? U.nb_cols() : V.nb_rows());
} else {
AU.resize(A.nb_cols(), transb == 'N' ? U.nb_cols() : V.nb_rows());
}
if (transb == 'N') {
add_hmatrix_matrix_product<CoefficientPrecision>(transa, 'N', 1, A, U, 0, AU);
add_matrix_matrix_product<CoefficientPrecision>('N', 'N', alpha, AU, V, beta, C);
} else {
AU.resize(A.nb_cols(), U.nb_cols());
add_hmatrix_matrix_product<CoefficientPrecision>(transa, transb, 1, A, V, 0, AU);
add_matrix_matrix_product<CoefficientPrecision>('N', transb, alpha, AU, U, beta, C);
}
add_hmatrix_matrix_product<CoefficientPrecision>(transa, 'N', 1, A, U, 0, AU);
add_matrix_matrix_product<CoefficientPrecision>('N', 'N', alpha, AU, V, beta, C);
}
}

template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
void add_hmatrix_lrmat_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const LowRankMatrix<CoefficientPrecision, CoordinatePrecision> &B, CoefficientPrecision beta, LowRankMatrix<CoefficientPrecision, CoordinatePrecision> &C) {
if (transb != 'N') {
htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE
}

auto rank = B.rank_of();

if (rank != 0) {
Expand All @@ -53,31 +41,57 @@ void add_hmatrix_lrmat_product(char transa, char transb, CoefficientPrecision al
auto &V_C = C.get_V();
if (beta == CoefficientPrecision(0) || C.rank_of() == 0) {
if (transa == 'N') {
U_C.resize(A.nb_rows(), U_B.nb_cols());
U_C.resize(A.nb_rows(), transb == 'N' ? U_B.nb_cols() : V_B.nb_rows());
} else {
U_C.resize(A.nb_cols(), transb == 'N' ? U_B.nb_cols() : V_B.nb_rows());
}
if (transb == 'N') {
V_C = V_B;
add_hmatrix_matrix_product<CoefficientPrecision>(transa, 'N', alpha, A, U_B, 0, U_C);
} else {
U_C.resize(A.nb_cols(), U_B.nb_cols());
V_C.resize(U_B.nb_cols(), U_B.nb_rows());
transpose(U_B, V_C);
if (transb == 'C') {
conj_if_complex(V_C.data(), V_C.nb_rows() * V_C.nb_cols());
}
add_hmatrix_matrix_product<CoefficientPrecision>(transa, transb, alpha, A, V_B, 0, U_C);
}
V_C = V_B;
add_hmatrix_matrix_product<CoefficientPrecision>(transa, 'N', alpha, A, U_B, 0, U_C);
} else {

// Concatenate V_B and V_C
Matrix<CoefficientPrecision> new_V;
scale(beta, V_C);
new_V.resize(V_B.nb_rows() + V_C.nb_rows(), V_C.nb_cols());
for (int j = 0; j < new_V.nb_cols(); j++) {
std::copy_n(V_B.data() + j * V_B.nb_rows(), V_B.nb_rows(), new_V.data() + j * new_V.nb_rows());
std::copy_n(V_C.data() + j * V_C.nb_rows(), V_C.nb_rows(), new_V.data() + j * new_V.nb_rows() + V_B.nb_rows());
if (transb == 'N') {
new_V.resize(V_B.nb_rows() + V_C.nb_rows(), V_C.nb_cols());
for (int j = 0; j < new_V.nb_cols(); j++) {
std::copy_n(V_B.data() + j * V_B.nb_rows(), V_B.nb_rows(), new_V.data() + j * new_V.nb_rows());
std::copy_n(V_C.data() + j * V_C.nb_rows(), V_C.nb_rows(), new_V.data() + j * new_V.nb_rows() + V_B.nb_rows());
}
} else {
new_V.resize(U_B.nb_cols() + V_C.nb_rows(), V_C.nb_cols());
Matrix<CoefficientPrecision> temp(U_B.nb_cols(), U_B.nb_rows());
transpose(U_B, temp);
if (transb == 'C') {
conj_if_complex(temp.data(), temp.nb_rows() * temp.nb_cols());
}
for (int j = 0; j < new_V.nb_cols(); j++) {
std::copy_n(temp.data() + j * temp.nb_rows(), temp.nb_rows(), new_V.data() + j * new_V.nb_rows());
std::copy_n(V_C.data() + j * V_C.nb_rows(), V_C.nb_rows(), new_V.data() + j * new_V.nb_rows() + V_B.nb_rows());
}
}

// Compute AU= A*U_A
Matrix<CoefficientPrecision> AU;
if (transa == 'N') {
AU.resize(A.nb_rows(), U_B.nb_cols());
AU.resize(A.nb_rows(), transb == 'N' ? U_B.nb_cols() : V_B.nb_rows());
} else {
AU.resize(A.nb_cols(), transb == 'N' ? U_B.nb_cols() : V_B.nb_rows());
}
if (transb == 'N') {
add_hmatrix_matrix_product<CoefficientPrecision>(transa, 'N', alpha, A, U_B, 0, AU);
} else {
AU.resize(A.nb_cols(), U_B.nb_cols());
add_hmatrix_matrix_product<CoefficientPrecision>(transa, transb, alpha, A, V_B, 0, AU);
}
add_hmatrix_matrix_product<CoefficientPrecision>(transa, 'N', alpha, A, U_B, 0, AU);

// Concatenate U_A and U_C
Matrix<CoefficientPrecision> new_U;
Expand All @@ -92,6 +106,17 @@ void add_hmatrix_lrmat_product(char transa, char transb, CoefficientPrecision al
}
}

template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
void add_hmatrix_lrmat_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const LowRankMatrix<CoefficientPrecision, CoordinatePrecision> &B, CoefficientPrecision beta, HMatrix<CoefficientPrecision, CoordinatePrecision> &C) {
if (beta != CoefficientPrecision(1)) {
scale(beta, C);
}

LowRankMatrix<CoefficientPrecision, CoordinatePrecision> lrmat(B.get_epsilon());
add_hmatrix_lrmat_product(transa, transb, alpha, A, B, CoefficientPrecision(1), lrmat);
add_lrmat_hmatrix(lrmat, C);
}

} // namespace htool

#endif
40 changes: 29 additions & 11 deletions include/htool/hmatrix/linalg/add_hmatrix_matrix_product.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,26 @@ namespace htool {

template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
void add_hmatrix_matrix_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const Matrix<CoefficientPrecision> &B, CoefficientPrecision beta, Matrix<CoefficientPrecision> &C) {

Matrix<CoefficientPrecision> transposed_B, transposed_C;
transpose(B, transposed_B);
transpose(C, transposed_C);
sequential_add_hmatrix_matrix_product_row_major(transa, transb, alpha, A, transposed_B.data(), beta, transposed_C.data(), transposed_C.nb_rows());
transpose(transposed_C, C);
if (transb == 'N') {
Matrix<CoefficientPrecision> transposed_B(B.nb_cols(), B.nb_rows()), transposed_C(C.nb_cols(), C.nb_rows());
transpose(B, transposed_B);
transpose(C, transposed_C);
sequential_add_hmatrix_matrix_product_row_major(transa, transb, alpha, A, transposed_B.data(), beta, transposed_C.data(), transposed_C.nb_rows());
transpose(transposed_C, C);
} else {
Matrix<CoefficientPrecision> transposed_C(C.nb_cols(), C.nb_rows());
transpose(C, transposed_C);
sequential_add_hmatrix_matrix_product_row_major(transa, 'N', alpha, A, B.data(), beta, transposed_C.data(), transposed_C.nb_rows());
transpose(transposed_C, C);
}
}

template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
void add_hmatrix_matrix_product(char transa, char transb, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const Matrix<CoefficientPrecision> &B, CoefficientPrecision beta, LowRankMatrix<CoefficientPrecision, CoordinatePrecision> &C) {
if (transb != 'N') {
htool::Logger::get_instance().log(LogLevel::ERROR, "Operation is not implemented for add_matrix_lrmat_product (transb=" + std::string(1, transb) + ")"); // LCOV_EXCL_LINE
}

bool C_is_overwritten = (beta == CoefficientPrecision(0) || C.rank_of() == 0);

int nb_rows = (transa == 'N') ? A.nb_rows() : A.nb_cols();
int nb_cols = B.nb_cols();
int nb_cols = (transb == 'N') ? B.nb_cols() : B.nb_rows();

//
Matrix<CoefficientPrecision> AB(nb_rows, nb_cols);
Expand Down Expand Up @@ -97,6 +99,22 @@ void add_hmatrix_matrix_product(char transa, char transb, CoefficientPrecision a
C.get_V() = new_V;
recompression(C);
}

// template <typename CoefficientPrecision, typename CoordinatePrecision = CoefficientPrecision>
// void add_symmetric_hmatrix_matrix_product(char side, char UPLO, CoefficientPrecision alpha, const HMatrix<CoefficientPrecision, CoordinatePrecision> &A, const Matrix<CoefficientPrecision> &B, CoefficientPrecision beta, Matrix<CoefficientPrecision> &C) {
// if (transb == 'N') {
// Matrix<CoefficientPrecision> transposed_B(B.nb_cols(), B.nb_rows()), transposed_C(C.nb_cols(), C.nb_rows());
// transpose(B, transposed_B);
// transpose(C, transposed_C);
// sequential_add_hmatrix_matrix_product_row_major(transa, transb, alpha, A, transposed_B.data(), beta, transposed_C.data(), transposed_C.nb_rows());
// transpose(transposed_C, C);
// } else {
// Matrix<CoefficientPrecision> transposed_C(C.nb_cols(), C.nb_rows());
// transpose(C, transposed_C);
// sequential_add_hmatrix_matrix_product_row_major(transa, 'N', alpha, A, B.data(), beta, transposed_C.data(), transposed_C.nb_rows());
// transpose(transposed_C, C);
// }
// }
} // namespace htool

#endif
Loading

0 comments on commit 2cb8363

Please sign in to comment.