diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 9905a8e1..9eee8a53 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -21,6 +21,12 @@ if(RESOLVE_USE_KLU) target_link_libraries(system.exe PRIVATE ReSolve) endif(RESOLVE_USE_KLU) +if(RESOLVE_USE_LUSOL) + # Basic LUSOL example without any refactorization + add_executable(lusol_lusol.exe r_LUSOL_LUSOL.cpp) + target_link_libraries(lusol_lusol.exe PRIVATE ReSolve) +endif() + # Build rand example, CPU ONLY r_randGMRES_cpu add_executable(gmres_cpu_rand.exe r_randGMRES_cpu.cpp) @@ -34,23 +40,23 @@ if(RESOLVE_USE_CUDA) # Build example with KLU factorization and GLU refactorization add_executable(klu_glu.exe r_KLU_GLU.cpp) target_link_libraries(klu_glu.exe PRIVATE ReSolve) - + # Build example with KLU factorization and Rf refactorization add_executable(klu_rf.exe r_KLU_rf.cpp) target_link_libraries(klu_rf.exe PRIVATE ReSolve) - + # Build example with KLU factorization, Rf refactorization, and FGMRES iterative refinement add_executable(klu_rf_fgmres.exe r_KLU_rf_FGMRES.cpp) target_link_libraries(klu_rf_fgmres.exe PRIVATE ReSolve) - + # Build example where matrix is factorized once, refactorized once and then the preconditioner is REUSED add_executable(klu_rf_fgmres_reuse_refactorization.exe r_KLU_rf_FGMRES_reuse_factorization.cpp) target_link_libraries(klu_rf_fgmres_reuse_refactorization.exe PRIVATE ReSolve) - - # Build example where matrix data is updated + + # Build example where matrix data is updated add_executable(klu_glu_values_update.exe r_KLU_GLU_matrix_values_update.cpp) target_link_libraries(klu_glu_values_update.exe PRIVATE ReSolve) - + # Build example with a configurable system solver add_executable(system_cuda.exe r_SysSolverCuda.cpp) target_link_libraries(system_cuda.exe PRIVATE ReSolve) @@ -75,25 +81,25 @@ if(RESOLVE_USE_HIP) # Build example with KLU factorization and rocsolver Rf refactorization add_executable(klu_rocsolverrf.exe r_KLU_rocsolverrf.cpp) target_link_libraries(klu_rocsolverrf.exe PRIVATE ReSolve) - + # Build example with KLU factorization, rocsolver Rf refactorization, and FGMRES iterative refinement add_executable(klu_rocsolverrf_fgmres.exe r_KLU_rocSolverRf_FGMRES.cpp) target_link_libraries(klu_rocsolverrf_fgmres.exe PRIVATE ReSolve) - + # Example in which factorization is redone if solution is bad add_executable(klu_rocsolverrf_check_redo.exe r_KLU_rocsolverrf_redo_factorization.cpp) target_link_libraries(klu_rocsolverrf_check_redo.exe PRIVATE ReSolve) - + # Build example with a configurable system solver add_executable(system_hip.exe r_SysSolverHip.cpp) target_link_libraries(system_hip.exe PRIVATE ReSolve) - + # Build example with a configurable system solver add_executable(system_hip_fgmres.exe r_SysSolverHipRefine.cpp) target_link_libraries(system_hip_fgmres.exe PRIVATE ReSolve) endif(RESOLVE_USE_KLU) - + # Rand GMRES test with rocsparse add_executable(gmres_rocsparse_rand.exe r_randGMRES.cpp) target_link_libraries(gmres_rocsparse_rand.exe PRIVATE ReSolve) @@ -121,9 +127,9 @@ if(RESOLVE_USE_HIP) list(APPEND installable_executables gmres_rocsparse_rand.exe) endif(RESOLVE_USE_HIP) - list(APPEND installable_executables gmres_cpu_rand.exe) + list(APPEND installable_executables gmres_cpu_rand.exe) -install(TARGETS ${installable_executables} +install(TARGETS ${installable_executables} RUNTIME DESTINATION bin) # Path where the consumer test code will be installed @@ -147,7 +153,7 @@ endif() install(DIRECTORY resolve_consumer DESTINATION share/examples) install(FILES ${PROJECT_SOURCE_DIR}/tests/functionality/${RESOLVE_CONSUMER_APP} DESTINATION share/examples/resolve_consumer RENAME consumer.cpp) -# Shell script argumets: +# Shell script argumets: # 1. Path to where resolve is installed. # 2. Path to data directory add_custom_target(test_install COMMAND ${CONSUMER_PATH}/test.sh ${CMAKE_INSTALL_PREFIX} ${PROJECT_SOURCE_DIR}/tests/functionality/) diff --git a/examples/r_LUSOL_LUSOL.cpp b/examples/r_LUSOL_LUSOL.cpp new file mode 100644 index 00000000..c55094af --- /dev/null +++ b/examples/r_LUSOL_LUSOL.cpp @@ -0,0 +1,180 @@ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace ReSolve::constants; +using index_type = ReSolve::index_type; +using real_type = ReSolve::real_type; +using vector_type = ReSolve::vector::Vector; + +int specializedMatvec(ReSolve::matrix::Coo* Ageneric, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta) +{ + + ReSolve::matrix::Coo* A = static_cast(Ageneric); + index_type* rows = A->getRowData(ReSolve::memory::HOST); + index_type* columns = A->getColData(ReSolve::memory::HOST); + real_type* values = A->getValues(ReSolve::memory::HOST); + + real_type* xs = vec_x->getData(ReSolve::memory::HOST); + real_type* rs = vec_result->getData(ReSolve::memory::HOST); + + index_type n_rows = A->getNumRows(); + + std::unique_ptr sums(new real_type[n_rows]); + std::fill_n(sums.get(), n_rows, 0); + + std::unique_ptr compensations(new real_type[n_rows]); + std::fill_n(compensations.get(), n_rows, 0); + + real_type y, t; + + for (index_type i = 0; i < A->getNnz(); i++) { + y = (values[i] * xs[columns[i]]) - compensations[rows[i]]; + t = sums[rows[i]] + y; + compensations[rows[i]] = t - sums[rows[i]] - y; + sums[rows[i]] = t; + } + + for (index_type i = 0; i < n_rows; i++) { + sums[i] *= *alpha; + rs[i] = (rs[i] * *beta) + sums[i]; + } + + vec_result->setDataUpdated(ReSolve::memory::HOST); + return 0; +} + +int main(int argc, char* argv[]) +{ + + if (argc <= 4) { + std::cerr << "usage: lusol_lusol.exe MATRIX_FILE RHS_FILE NUM_SYSTEMS [MATRIX_FILE_ID RHS_FILE_ID...]" << std::endl; + return 1; + } + + std::string matrixFileName = argv[1]; + std::string rhsFileName = argv[2]; + + index_type numSystems = atoi(argv[3]); + if (numSystems == 0) { + return 0; + } + + std::cout << "Family mtx file name: " << matrixFileName << ", total number of matrices: " << numSystems << std::endl; + std::cout << "Family rhs file name: " << rhsFileName << ", total number of RHSes: " << numSystems << std::endl; + + std::string fileId; + std::string rhsId; + std::string matrixFileNameFull; + std::string rhsFileNameFull; + + std::unique_ptr workspace(new ReSolve::LinAlgWorkspaceCpu()); + std::unique_ptr matrix_handler(new ReSolve::MatrixHandler(workspace.get())); + std::unique_ptr vector_handler(new ReSolve::VectorHandler(workspace.get())); + real_type norm_A, norm_x, norm_r; // used for INF norm + + std::unique_ptr lusol(new ReSolve::LinSolverDirectLUSOL); + + ReSolve::matrix::Coo* A; + real_type* rhs; + vector_type *vec_rhs, *vec_r, *vec_x; + + for (int i = 0; i < numSystems; ++i) { + index_type j = 4 + i * 2; + fileId = argv[j]; + rhsId = argv[j + 1]; + + matrixFileNameFull = ""; + rhsFileNameFull = ""; + + matrixFileNameFull = matrixFileName + fileId + ".mtx"; + rhsFileNameFull = rhsFileName + rhsId + ".mtx"; + std::cout << "Reading: " << matrixFileNameFull << std::endl; + + std::ifstream mat_file(matrixFileNameFull); + if (!mat_file.is_open()) { + std::cout << "Failed to open file " << matrixFileNameFull << "\n"; + return -1; + } + std::ifstream rhs_file(rhsFileNameFull); + if (!rhs_file.is_open()) { + std::cout << "Failed to open file " << rhsFileNameFull << "\n"; + return -1; + } + if (i == 0) { + A = ReSolve::io::readMatrixFromFile(mat_file); + rhs = ReSolve::io::readRhsFromFile(rhs_file); + vec_rhs = new vector_type(A->getNumRows()); + vec_r = new vector_type(A->getNumRows()); + + vec_x = new vector_type(A->getNumRows()); + vec_x->allocate(ReSolve::memory::HOST); + } else { + ReSolve::io::readAndUpdateMatrix(mat_file, A); + ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); + } + std::cout << "Finished reading the matrix and rhs, size: " << A->getNumRows() + << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() + << ", Expanded? " << A->expanded() << std::endl; + mat_file.close(); + rhs_file.close(); + + vec_rhs->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + vec_rhs->setDataUpdated(ReSolve::memory::HOST); + ReSolve::matrix::expand(*A); + std::cout << "Matrix expansion completed. Expanded NNZ: " << A->getNnzExpanded() << std::endl; + // Now call direct solver + int status; + + lusol->setup(A); + status = lusol->analyze(); + std::cout << "LUSOL analysis status: " << status << std::endl; + status = lusol->factorize(); + std::cout << "LUSOL factorization status: " << status << std::endl; + status = lusol->solve(vec_rhs, vec_x); + std::cout << "LUSOL solve status: " << status << std::endl; + vec_r->update(rhs, ReSolve::memory::HOST, ReSolve::memory::HOST); + + matrix_handler->setValuesChanged(true, ReSolve::memory::HOST); + + specializedMatvec(A, vec_x, vec_r, &ONE, &MINUSONE); + norm_r = vector_handler->infNorm(vec_r, ReSolve::memory::HOST); + + std::cout << "\t2-Norm of the residual: " + << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, ReSolve::memory::HOST)) << "\n"; + matrix_handler->matrixInfNorm(A, &norm_A, ReSolve::memory::HOST); + norm_x = vector_handler->infNorm(vec_x, ReSolve::memory::HOST); + std::cout << "\tMatrix inf norm: " << std::scientific << std::setprecision(16) << norm_A << "\n" + << "\tResidual inf norm: " << norm_r << "\n" + << "\tSolution inf norm: " << norm_x << "\n" + << "\tNorm of scaled residuals: " << norm_r / (norm_A * norm_x) << "\n"; + } + + delete A; + delete[] rhs; + delete vec_r; + delete vec_x; + delete vec_rhs; + + return 0; +} diff --git a/resolve/LinSolverDirectLUSOL.cpp b/resolve/LinSolverDirectLUSOL.cpp index a8c579d3..f2064ad8 100644 --- a/resolve/LinSolverDirectLUSOL.cpp +++ b/resolve/LinSolverDirectLUSOL.cpp @@ -1,7 +1,7 @@ +#include #include #include #include -#include #include "LinSolverDirectLUSOL.hpp" #include "lusol/lusol.hpp" @@ -85,14 +85,14 @@ namespace ReSolve /** * @brief Analysis function of LUISOL - * + * * At this time, only memory allocation and initialization is done here. - * + * * @return int - 0 if successful, error code otherwise - * + * * @pre System matrix `A_` is in unsorted COO format without duplicates. - * - * @note LUSOL does not expose symbolic factorization in its API. It might + * + * @note LUSOL does not expose symbolic factorization in its API. It might * be possible refactor lu1fac into separate symbolic and numerical * factorization functions, but for now, we do the both in ::factorize(). */ @@ -156,7 +156,54 @@ namespace ReSolve w_, &inform); - // TODO: consider handling inform = 7 + if (inform == 7) { + delete[] a_; + delete[] indc_; + delete[] indr_; + + lena_ = luparm_[12]; + + a_ = new real_type[lena_]; + indc_ = new index_type[lena_]; + indr_ = new index_type[lena_]; + mem_.setZeroArrayOnHost(a_, lena_); + mem_.setZeroArrayOnHost(indc_, lena_); + mem_.setZeroArrayOnHost(indr_, lena_); + + mem_.setZeroArrayOnHost(p_, m_); + + mem_.setZeroArrayOnHost(q_, n_); + + mem_.setZeroArrayOnHost(lenc_, n_); + + mem_.setZeroArrayOnHost(lenr_, m_); + + mem_.setZeroArrayOnHost(locc_, n_); + + mem_.setZeroArrayOnHost(locr_, m_); + + mem_.setZeroArrayOnHost(iploc_, n_); + + mem_.setZeroArrayOnHost(iqloc_, m_); + + mem_.setZeroArrayOnHost(ipinv_, m_); + + mem_.setZeroArrayOnHost(iqinv_, n_); + + mem_.setZeroArrayOnHost(w_, n_); + + real_type* a_in = A_->getValues(memory::HOST); + index_type* indc_in = A_->getRowData(memory::HOST); + index_type* indr_in = A_->getColData(memory::HOST); + + for (index_type i = 0; i < nelem_; i++) { + a_[i] = a_in[i]; + indc_[i] = indc_in[i] + 1; + indr_[i] = indr_in[i] + 1; + } + + return factorize(); + } return inform; } @@ -280,7 +327,6 @@ namespace ReSolve mem_.setZeroArrayOnHost(indc_, lena_); mem_.setZeroArrayOnHost(indr_, lena_); - p_ = new index_type[m_]; mem_.setZeroArrayOnHost(p_, m_); @@ -313,7 +359,7 @@ namespace ReSolve w_ = new real_type[n_]; mem_.setZeroArrayOnHost(w_, n_); - + return 0; } @@ -333,20 +379,20 @@ namespace ReSolve delete[] ipinv_; delete[] iqinv_; delete[] w_; - a_ = nullptr; - indc_ = nullptr; - indr_ = nullptr; - p_ = nullptr; - q_ = nullptr; - lenc_ = nullptr; - lenr_ = nullptr; - locc_ = nullptr; - locr_ = nullptr; + a_ = nullptr; + indc_ = nullptr; + indr_ = nullptr; + p_ = nullptr; + q_ = nullptr; + lenc_ = nullptr; + lenr_ = nullptr; + locc_ = nullptr; + locr_ = nullptr; iploc_ = nullptr; - iqloc_ = nullptr; - ipinv_ = nullptr; + iqloc_ = nullptr; + ipinv_ = nullptr; iqinv_ = nullptr; - w_ = nullptr; + w_ = nullptr; return 0; }