From 824b0ec1e45c700a94632f5b2e0a180876a9a4dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Wed, 14 Aug 2024 19:03:04 +0300 Subject: [PATCH 01/22] Remove dependency on telescope in msweep source. --- CMakeLists.txt | 41 +------ include/Likelihood.hpp | 20 ++-- include/Sample.hpp | 22 ++-- include/mSWEEP_alignment.hpp | 201 +++++++++++++++++++++++++++++++++++ src/BootstrapSample.cpp | 4 +- src/Sample.cpp | 4 +- src/mSWEEP.cpp | 35 +++--- 7 files changed, 252 insertions(+), 75 deletions(-) create mode 100644 include/mSWEEP_alignment.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9af6111..e9d8c88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -168,6 +168,11 @@ endif() include_directories(${CMAKE_BXZSTR_HEADERS}) ## Check dependencies and download them if not given +## BitMagic +set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/external/telescope/external/BitMagic-7.12.3/src/) +include_directories(${CMAKE_BITMAGIC_HEADERS}) +include_directories(${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/external/telescope/include) + ## cxxio if (DEFINED CMAKE_CXXIO_HEADERS) message(STATUS "cxxio headers provided in: ${CMAKE_CXXIO_HEADERS}") @@ -236,42 +241,6 @@ endif() include_directories(${CMAKE_ALIGNMENT_WRITER_HEADERS}) target_link_libraries(mSWEEP ${CMAKE_ALIGNMENT_WRITER_LIBRARY}) -## telescope -if (DEFINED CMAKE_TELESCOPE_LIBRARY AND DEFINED CMAKE_TELESCOPE_HEADERS) - message(STATUS "telescope headers provided in: ${CMAKE_TELESCOPE_HEADERS}") - message(STATUS "telescope library provided in: ${CMAKE_TELESCOPE_LIBRARY}") -else() - FetchContent_Declare(telescope - GIT_REPOSITORY https://github.com/tmaklin/telescope.git - GIT_TAG v0.7.2 - PREFIX "external" - SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/telescope" - BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/telescope" - BUILD_IN_SOURCE 0 - CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS} - -D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS} - -D CMAKE_CXXIO_HEADERS=${CMAKE_CXXIO_HEADERS} - -D CMAKE_ALIGNMENT_WRITER_HEADERS=${CMAKE_ALIGNMENT_WRITER_HEADERS} - -D CMAKE_ALIGNMENT_WRITER_LIBRARY=${CMAKE_ALIGNMENT_WRITER_LIBRARY} - -D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_SOURCE_DIR}/external/telescope/external/BitMagic-7.12.3/src - -D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -D "CMAKE_C_FLAGS=${CMAKE_C_FLAGS}" - -D "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" - -D "CMAKE_C_COMPILER=${CMAKE_C_COMPILER}" - -D "CMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" - INSTALL_COMMAND "" - ) - FetchContent_MakeAvailable(telescope) - add_dependencies(telescope libalignmentwriter) - add_dependencies(mSWEEP telescope) - set_target_properties(telescope PROPERTIES EXCLUDE_FROM_ALL 1) - set(CMAKE_TELESCOPE_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/telescope/include) - set(CMAKE_TELESCOPE_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/lib/libtelescope.a) - set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/telescope/external/BitMagic-7.12.3/src) -endif() -include_directories(${CMAKE_TELESCOPE_HEADERS} ${CMAKE_BITMAGIC_HEADERS}) -target_link_libraries(mSWEEP ${CMAKE_TELESCOPE_LIBRARY}) - ## seamat if (DEFINED CMAKE_SEAMAT_HEADERS) message(STATUS "seamat headers provided in: ${CMAKE_SEAMAT_HEADERS}") diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 88d0926..6478c92 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -26,7 +26,6 @@ #define MSWEEP_LIKELIHOOD_HPP #include "Matrix.hpp" -#include "telescope.hpp" #include "mSWEEP_openmp_config.hpp" @@ -41,6 +40,7 @@ #include #include +#include "mSWEEP_alignment.hpp" #include "Grouping.hpp" namespace mSWEEP { @@ -106,7 +106,7 @@ class LL_WOR21 : public Likelihood { return ll_mat; } - void fill_ll_mat(const telescope::Alignment &alignment, const std::vector &group_sizes, const size_t n_groups, const size_t min_hits) { + void fill_ll_mat(const mSWEEP::Alignment &alignment, const std::vector &group_sizes, const size_t n_groups, const size_t min_hits) { size_t num_ecs = alignment.n_ecs(); bool mask_groups = min_hits > 0; @@ -116,8 +116,9 @@ class LL_WOR21 : public Likelihood { std::vector group_hit_counts(n_groups, (size_t)0); // Create mask identifying groups that have at least 1 alignment for (size_t i = 0; i < num_ecs; ++i) { + std::vector ec_hit_counts = alignment(i); for (size_t j = 0; j < n_groups; ++j) { - group_hit_counts[j] += (alignment(j, i) > 0) * alignment.reads_in_ec(i); + group_hit_counts[j] += ec_hit_counts[j]; } } for (size_t i = 0; i < n_groups; ++i) { @@ -137,21 +138,22 @@ class LL_WOR21 : public Likelihood { this->log_likelihoods.resize(n_masked_groups, num_ecs, std::log(this->zero_inflation)); for (size_t j = 0; j < num_ecs; ++j) { size_t groups_pos = 0; + std::vector ec_hit_counts = alignment(j); for (size_t i = 0; i < n_groups; ++i) { if (this->groups_mask[i]) { - this->log_likelihoods(groups_pos, j) = precalc_lls_mat(groups_pos, alignment(i, j)); + this->log_likelihoods(groups_pos, j) = precalc_lls_mat(groups_pos, ec_hit_counts[i]); ++groups_pos; } } } } - void fill_ec_counts(const telescope::Alignment &alignment) { + void fill_ec_counts(const mSWEEP::Alignment &alignment) { // Fill log ec counts. this->log_ec_counts.resize(alignment.n_ecs(), 0); #pragma omp parallel for schedule(static) for (size_t i = 0; i < alignment.n_ecs(); ++i) { - this->log_ec_counts[i] = std::log(alignment.reads_in_ec(i)); + this->log_ec_counts[i] = 0.0; } } @@ -170,14 +172,14 @@ class LL_WOR21 : public Likelihood { public: LL_WOR21() = default; - LL_WOR21(const std::vector &group_sizes, const telescope::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const size_t min_hits, const T _zero_inflation) { + LL_WOR21(const std::vector &group_sizes, const mSWEEP::Alignment &alignment, const size_t n_groups, const T tol, const T frac_mu, const size_t min_hits, const T _zero_inflation) { this->bb_constants[0] = tol; this->bb_constants[1] = frac_mu; this->zero_inflation = _zero_inflation; this->from_grouped_alignment(alignment, group_sizes, n_groups, min_hits); } - void from_grouped_alignment(const telescope::Alignment &alignment, const std::vector &group_sizes, const size_t n_groups, const size_t min_hits) { + void from_grouped_alignment(const mSWEEP::Alignment &alignment, const std::vector &group_sizes, const size_t n_groups, const size_t min_hits) { this->fill_ll_mat(alignment, group_sizes, n_groups, min_hits); this->fill_ec_counts(alignment); } @@ -292,7 +294,7 @@ class LL_WOR21 : public Likelihood { const std::vector& groups_considered() const override { return this->groups_mask; }; }; template -std::unique_ptr> ConstructAdaptiveLikelihood(const telescope::Alignment &alignment, const Grouping &grouping, const T q, const T e, const size_t min_hits, const T zero_inflation) { +std::unique_ptr> ConstructAdaptiveLikelihood(const mSWEEP::Alignment &alignment, const Grouping &grouping, const T q, const T e, const size_t min_hits, const T zero_inflation) { size_t max_group_size = grouping.max_group_size(); size_t n_groups = grouping.get_n_groups(); std::unique_ptr> log_likelihoods; diff --git a/include/Sample.hpp b/include/Sample.hpp index 8a3ce79..b9eb3d0 100644 --- a/include/Sample.hpp +++ b/include/Sample.hpp @@ -33,7 +33,7 @@ #include #include "Matrix.hpp" -#include "telescope.hpp" +#include "mSWEEP_alignment.hpp" namespace mSWEEP { class Sample { @@ -47,7 +47,7 @@ class Sample { std::vector log_KLDs; protected: - void count_alignments(const telescope::Alignment &alignment); + void count_alignments(const mSWEEP::Alignment &alignment); public: // Virtual functions @@ -111,7 +111,7 @@ class PlainSample : public Sample { public: PlainSample() = default; - PlainSample(const telescope::Alignment &alignment) { + PlainSample(const mSWEEP::Alignment &alignment) { this->count_alignments(alignment); } @@ -132,7 +132,7 @@ class BinningSample : public PlainSample, public Binning { public: BinningSample() = default; - BinningSample(const telescope::Alignment &alignment) { + BinningSample(const mSWEEP::Alignment &alignment) { this->count_alignments(alignment); this->store_aligned_reads(alignment.get_aligned_reads()); } @@ -157,19 +157,19 @@ class BootstrapSample : public Sample { std::vector> bootstrap_results; // Set all variables required to bootstrap the ec_counts later - void init_bootstrap(const telescope::Alignment &alignment); + void init_bootstrap(const mSWEEP::Alignment &alignment); protected: - void construct(const telescope::Alignment &alignment, const size_t _iters, const int32_t seed, const size_t bootstrap_count=0); + void construct(const mSWEEP::Alignment &alignment, const size_t _iters, const int32_t seed, const size_t bootstrap_count=0); public: BootstrapSample() = default; // Set seed in constructor - BootstrapSample(const telescope::Alignment &alignment, const size_t _iters, const int32_t seed) { + BootstrapSample(const mSWEEP::Alignment &alignment, const size_t _iters, const int32_t seed) { this->construct(alignment, _iters, seed); } - BootstrapSample(const telescope::Alignment &alignment, const size_t _iters, const size_t _bootstrap_count, const int32_t seed) { + BootstrapSample(const mSWEEP::Alignment &alignment, const size_t _iters, const size_t _bootstrap_count, const int32_t seed) { this->construct(alignment, _iters, seed, _bootstrap_count); } @@ -191,18 +191,18 @@ class BootstrapSample : public Sample { class BinningBootstrap : public BootstrapSample, public Binning { public: - BinningBootstrap(const telescope::Alignment &alignment, const size_t _iters, const int32_t seed) { + BinningBootstrap(const mSWEEP::Alignment &alignment, const size_t _iters, const int32_t seed) { this->construct(alignment, _iters, seed); this->store_aligned_reads(alignment.get_aligned_reads()); } - BinningBootstrap(const telescope::Alignment &alignment, const size_t _iters, const size_t _bootstrap_count, const int32_t seed) { + BinningBootstrap(const mSWEEP::Alignment &alignment, const size_t _iters, const size_t _bootstrap_count, const int32_t seed) { this->construct(alignment, _iters, seed, _bootstrap_count); this->store_aligned_reads(alignment.get_aligned_reads()); } }; -void ConstructSample(const telescope::Alignment &alignment, const size_t bootstrap_iters, const size_t bootstrap_count, const size_t bootstrap_seed, const bool bin_reads, std::unique_ptr &sample); +void ConstructSample(const mSWEEP::Alignment &alignment, const size_t bootstrap_iters, const size_t bootstrap_count, const size_t bootstrap_seed, const bool bin_reads, std::unique_ptr &sample); } diff --git a/include/mSWEEP_alignment.hpp b/include/mSWEEP_alignment.hpp new file mode 100644 index 0000000..41d11f4 --- /dev/null +++ b/include/mSWEEP_alignment.hpp @@ -0,0 +1,201 @@ +// mSWEEP: Estimate abundances of reference lineages in DNA sequencing reads. +// +// MIT License +// +// Copyright (c) 2023 Probabilistic Inference and Computational Biology group @ UH +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +#ifndef MSWEEP_ALIGNMENT_HPP +#define MSWEEP_ALIGNMENT_HPP + +#include "bm64.h" +#include "unpack.hpp" + +#include +#include +#include + +namespace mSWEEP { +class Alignment { +private: + bm::bvector<> bits; + size_t n_targets; + size_t n_queries; + std::vector group_indicators; + size_t n_groups; + +public: + Alignment(size_t _n_targets) { + this->n_targets = _n_targets; + } + + void ReadPlaintextLine(const size_t n_targets, std::string &line, bm::bvector<>::bulk_insert_iterator &it) { + // telescope::ReadPlaintextLine + // + // Reads a line in a plaintext alignment file from Themisto + // (https://github.com/algbio/themisto) into the bm::bvector<> `it` + // inserts to. + // + // Input: + // `n_targets`: number of pseudoalignment targets (reference + // sequences). It's not possible to infer this from the Themisto + // file format so has to be provided separately. + // `line`: the line from the alignment file to read in. + // `it`: insert iterator to the bm::bvector<> variable for storing the alignment. + // + std::string part; + std::stringstream partition(line); + + // First column is read id (0-based indexing). + std::getline(partition, part, ' '); + size_t read_id = std::stoul(part); + + // Next columns contain the target sequence id (0-based indexing). + while (std::getline(partition, part, ' ')) { + *it = read_id*n_targets + std::stoul(part); // set bit `n_reads*n_refs + std::stoul(part)` as true + } + } + + size_t ReadPlaintextAlignment(const size_t n_targets, std::string &line, std::istream *stream, bm::bvector<> *ec_configs) { + // telescope::ReadPlaintextAlignment + // + // Reads a plaintext alignment file from Themisto + // (https://github.com/algbio/themisto) into `*ec_configs` and + // return the number of reads in the file (both unaligned and + // aligned). + // + // Input: + // `n_targets`: number of pseudoalignment targets (reference + // sequences). It's not possible to infer this from the Themisto + // file format so has to be provided separately. + // `line`: read each line into this variable. + // NOTE: the contents of the *first* line should already be stored in this variable. + // `stream`: pointer to an istream opened on the pseudoalignment file. + // `ec_configs`: pointer to the output variable that will contain the alignment. + // Output: + // `n_reads`: total number of reads in the pseudoalignment (unaligned + aligned). + // + bm::bvector<>::bulk_insert_iterator it(*ec_configs); // Bulk insert iterator buffers the insertions + + size_t n_reads = 1; + try { + // Contents of the first line is already stored in `line` + ReadPlaintextLine(n_targets, line, it); + + size_t compress_interval = 1000000; + while (std::getline(*stream, line)) { + // Insert each line into the alignment + ReadPlaintextLine(n_targets, line, it); + ++n_reads; + if (n_reads % compress_interval == 0) { + ec_configs->optimize(); + } + } + } catch (const std::exception &e) { + std::string msg(e.what()); + if (msg.find("stoul") != std::string::npos) { + throw std::runtime_error("File format not supported on line " + std::to_string(n_reads) + " with content: " + line); + } else { + throw std::runtime_error("Could not parse line " + std::to_string(n_reads) + " with content: " + line); + } + } + return n_reads; + } + + + void read(const std::string &merge_mode, std::vector &strands) { + for (size_t i = 0; i < strands.size(); ++i) { + std::string line; + std::getline(*strands[i], line); // Read the first line to check the format + size_t n_reads; + bm::bvector<> strand_alignment; + if (line.find(',') != std::string::npos) { + // First line contains a ','; stream could be in the compact format. + size_t n_refs; + alignment_writer::ReadHeader(line, &n_reads, &n_refs); + if (n_refs > this->n_targets) { + throw std::runtime_error("Pseudoalignment file has more target sequences than expected."); + } else if (this->n_targets < n_refs) { + throw std::runtime_error("Pseudoalignment file has less target sequences than expected."); + } + // Size is given on the header line. + strand_alignment.resize(n_reads*n_refs); + alignment_writer::UnpackData(strands[i], strand_alignment); + } else { + // Stream could be in the plaintext format. + // Size is unknown. + strand_alignment.set_new_blocks_strat(bm::BM_GAP); + n_reads = ReadPlaintextAlignment(n_targets, line, strands[i], &strand_alignment); + } + this->n_queries = n_reads; + + if (i == 0) { + this->bits = std::move(strand_alignment); + } else { + if (merge_mode == "intersection") { + this->bits.bit_and(strand_alignment); + } else if (merge_mode == "union") { + this->bits.bit_or(strand_alignment); + } else { + throw std::runtime_error("Unrecognized option `" + merge_mode + "` for --themisto-mode"); + } + } + } + } + + void collapse() { + + } + + size_t n_ecs() const { return this->n_queries; }; + size_t n_reads() const { return this->n_queries; }; + size_t reads_in_ec(const size_t i=0) const { return 1; }; + + std::vector operator()(const size_t row) const { + std::vector ret(this->n_groups, 0); + for (size_t i = 0; i < this->n_targets; ++i) { + ret[this->group_indicators[i]] += this->bits[row*n_targets + i]; + } + return ret; + } + + std::vector> get_aligned_reads() const { + std::vector> ret(this->n_queries, std::vector(1, 0)); + for (size_t i = 0; i < n_queries; ++i) { + ret[i][0] = i; + } + return ret; + } + + template + void add_groups(const std::vector &grouping) { + size_t _n_groups = 0; + this->group_indicators = std::vector(grouping.size()); + for (size_t i = 0; i < grouping.size(); ++i) { + group_indicators[i] = grouping[i]; + _n_groups = (n_groups > grouping[i] ? n_groups : grouping[i]); + } + this->n_groups = _n_groups + 1; + } + +}; +} + +#endif diff --git a/src/BootstrapSample.cpp b/src/BootstrapSample.cpp index 694baf9..cd52f56 100644 --- a/src/BootstrapSample.cpp +++ b/src/BootstrapSample.cpp @@ -30,7 +30,7 @@ #include "mSWEEP_version.h" namespace mSWEEP { -void BootstrapSample::init_bootstrap(const telescope::Alignment &alignment) { +void BootstrapSample::init_bootstrap(const mSWEEP::Alignment &alignment) { // Clear the bootstrap abundances in case we're estimating the same sample again. this->bootstrap_results = std::vector>(); @@ -43,7 +43,7 @@ void BootstrapSample::init_bootstrap(const telescope::Alignment &alignment) { ec_distribution = std::discrete_distribution(weights.begin(), weights.end()); } -void BootstrapSample::construct(const telescope::Alignment &alignment, const size_t _iters, const int32_t seed, const size_t _bootstrap_count) { +void BootstrapSample::construct(const mSWEEP::Alignment &alignment, const size_t _iters, const int32_t seed, const size_t _bootstrap_count) { this->count_alignments(alignment); if (seed == 26012023) { std::random_device rd; diff --git a/src/Sample.cpp b/src/Sample.cpp index 0a524b1..6fa16ee 100644 --- a/src/Sample.cpp +++ b/src/Sample.cpp @@ -27,7 +27,7 @@ #include namespace mSWEEP { -void ConstructSample(const telescope::Alignment &alignment, const size_t bootstrap_iters, const size_t bootstrap_count, const size_t bootstrap_seed, const bool bin_reads, std::unique_ptr &sample) { +void ConstructSample(const mSWEEP::Alignment &alignment, const size_t bootstrap_iters, const size_t bootstrap_count, const size_t bootstrap_seed, const bool bin_reads, std::unique_ptr &sample) { // Wrapper for determining which Sample type to construct. // Initialize Sample depending on how the alignment needs to be processed. if (bootstrap_iters > 0) { @@ -49,7 +49,7 @@ void ConstructSample(const telescope::Alignment &alignment, const size_t bootstr } } -void Sample::count_alignments(const telescope::Alignment &alignment) { +void Sample::count_alignments(const mSWEEP::Alignment &alignment) { // Count the number of aligned reads and store in counts_total size_t aln_counts_total = 0; #pragma omp parallel for schedule(static) reduction(+:aln_counts_total) diff --git a/src/mSWEEP.cpp b/src/mSWEEP.cpp index 04d666c..9f40d8f 100644 --- a/src/mSWEEP.cpp +++ b/src/mSWEEP.cpp @@ -34,11 +34,12 @@ #include "cxxargs.hpp" #include "Matrix.hpp" -#include "telescope.hpp" #include "cxxio.hpp" #include "rcgpar.hpp" #include "bin_reads.h" +#include "mSWEEP_alignment.hpp" + #include "mSWEEP_mpi_config.hpp" #include "mSWEEP_openmp_config.hpp" #include "mSWEEP_version.h" @@ -332,7 +333,8 @@ int main (int argc, char *argv[]) { // To save memory, the alignment can go out of scope. // The necessary values are stored in the Sample class. log << " reading pseudoalignments" << '\n'; - std::unique_ptr alignment; + mSWEEP::Alignment alignment(n_refs); + // std::unique_ptr alignment; try { const std::vector &alignment_paths = args.value>("themisto"); size_t n_files = alignment_paths.size(); @@ -350,16 +352,8 @@ int main (int argc, char *argv[]) { strands.emplace_back(&std::cin); } - // Read the pseudoalignment - if (n_groups <= std::numeric_limits::max()) { - telescope::read::ThemistoGrouped(telescope::get_mode(args.value("themisto-mode")), n_refs, static_cast*>(&(*reference))->get_group_indicators(i), strands, alignment); - } else if (n_groups <= std::numeric_limits::max()) { - telescope::read::ThemistoGrouped(telescope::get_mode(args.value("themisto-mode")), n_refs, static_cast*>(&(*reference))->get_group_indicators(i), strands, alignment); - } else if (n_groups <= std::numeric_limits::max()) { - telescope::read::ThemistoGrouped(telescope::get_mode(args.value("themisto-mode")), n_refs, static_cast*>(&(*reference))->get_group_indicators(i), strands, alignment); - } else { - telescope::read::ThemistoGrouped(telescope::get_mode(args.value("themisto-mode")), n_refs, static_cast*>(&(*reference))->get_group_indicators(i), strands, alignment); - } + alignment.read(args.value("themisto-mode"), strands); + } catch (std::exception &e) { finalize("Reading the pseudoalignments failed:\n " + std::string(e.what()) + "\nexiting\n", log, true); return 1; @@ -367,7 +361,18 @@ int main (int argc, char *argv[]) { // Use the alignment data to populate the log_likelihoods matrix. try { - log_likelihoods = mSWEEP::ConstructAdaptiveLikelihood(*alignment, reference->get_grouping(i), args.value('q'), args.value('e'), args.value("min-hits"), args.value("zero-inflation")); + // Read the pseudoalignment + if (n_groups <= std::numeric_limits::max()) { + alignment.add_groups(static_cast*>(&(*reference))->get_group_indicators(i)); + } else if (n_groups <= std::numeric_limits::max()) { + alignment.add_groups(static_cast*>(&(*reference))->get_group_indicators(i)); + } else if (n_groups <= std::numeric_limits::max()) { + alignment.add_groups(static_cast*>(&(*reference))->get_group_indicators(i)); + } else { + alignment.add_groups(static_cast*>(&(*reference))->get_group_indicators(i)); + } + + log_likelihoods = mSWEEP::ConstructAdaptiveLikelihood(alignment, reference->get_grouping(i), args.value('q'), args.value('e'), args.value("min-hits"), args.value("zero-inflation")); } catch (std::exception &e) { finalize("Building the log-likelihood array failed:\n " + std::string(e.what()) + "\nexiting\n", log, true); return 1; @@ -375,9 +380,9 @@ int main (int argc, char *argv[]) { // Initialize Sample depending on how the alignment needs to be processed. // Note: this is also only used by the root process in MPI configuration. - mSWEEP::ConstructSample(*alignment, args.value("iters"), args.value("bootstrap-count"), args.value("seed"), bin_reads, sample); + mSWEEP::ConstructSample(alignment, args.value("iters"), args.value("bootstrap-count"), args.value("seed"), bin_reads, sample); - log << " read " << alignment->n_ecs() << " unique alignments" << '\n'; + log << " read " << alignment.n_ecs() << " unique alignments" << '\n'; log.flush(); } else { try { From bb44a9b5e97fe5cfd7749ccd428dcdee523aa8d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Thu, 15 Aug 2024 12:25:00 +0300 Subject: [PATCH 02/22] Implement collapsing into equivalence classes. --- include/Likelihood.hpp | 4 +-- include/mSWEEP_alignment.hpp | 48 ++++++++++++++++++++++++++++-------- src/mSWEEP.cpp | 1 + 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 6478c92..7ab25c9 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -118,7 +118,7 @@ class LL_WOR21 : public Likelihood { for (size_t i = 0; i < num_ecs; ++i) { std::vector ec_hit_counts = alignment(i); for (size_t j = 0; j < n_groups; ++j) { - group_hit_counts[j] += ec_hit_counts[j]; + group_hit_counts[j] += ec_hit_counts[j] * alignment.reads_in_ec(i); } } for (size_t i = 0; i < n_groups; ++i) { @@ -153,7 +153,7 @@ class LL_WOR21 : public Likelihood { this->log_ec_counts.resize(alignment.n_ecs(), 0); #pragma omp parallel for schedule(static) for (size_t i = 0; i < alignment.n_ecs(); ++i) { - this->log_ec_counts[i] = 0.0; + this->log_ec_counts[i] = std::log(alignment.reads_in_ec(i)); } } diff --git a/include/mSWEEP_alignment.hpp b/include/mSWEEP_alignment.hpp index 41d11f4..ad159ee 100644 --- a/include/mSWEEP_alignment.hpp +++ b/include/mSWEEP_alignment.hpp @@ -28,18 +28,23 @@ #include "bm64.h" #include "unpack.hpp" +#include "mSWEEP_openmp_config.hpp" + #include #include #include +#include namespace mSWEEP { class Alignment { private: - bm::bvector<> bits; size_t n_targets; size_t n_queries; std::vector group_indicators; size_t n_groups; + std::vector> ec_read_ids; + std::vector ec_counts; + bm::bvector<> bits; public: Alignment(size_t _n_targets) { @@ -161,12 +166,39 @@ class Alignment { } void collapse() { - + std::unordered_map mymap; + bm::bvector<> collapsed_bits; + size_t ec_id = 0; + size_t unl = std::hash>{}(std::vector(this->n_targets, false)); + for (size_t i = 0; i < this->n_queries; ++i) { + std::vector aln(this->n_targets, false); + for (size_t j = 0; j < this->n_targets; ++j) { + aln[j] = this->bits[i*this->n_targets + j]; + } + size_t hash = std::hash>{}(aln); + bool any_aligned = hash != unl; + if (any_aligned) { + auto got = mymap.find(hash); + if (got == mymap.end()) { + mymap.insert(std::make_pair(hash, ec_id)); + for (size_t j = 0; j < this->n_targets; ++j) { + collapsed_bits[ec_id*this->n_targets + j] = aln[j]; + } + this->ec_counts.emplace_back(1); + this->ec_read_ids.emplace_back(std::vector({(uint32_t)i})); + ++ec_id; + } else { + ++this->ec_counts[got->second]; + this->ec_read_ids[got->second].emplace_back(i); + } + } + } + this->bits = std::move(collapsed_bits); } - size_t n_ecs() const { return this->n_queries; }; + size_t n_ecs() const { return this->ec_counts.size(); }; size_t n_reads() const { return this->n_queries; }; - size_t reads_in_ec(const size_t i=0) const { return 1; }; + size_t reads_in_ec(const size_t i) const { return this->ec_counts[i]; }; std::vector operator()(const size_t row) const { std::vector ret(this->n_groups, 0); @@ -176,12 +208,8 @@ class Alignment { return ret; } - std::vector> get_aligned_reads() const { - std::vector> ret(this->n_queries, std::vector(1, 0)); - for (size_t i = 0; i < n_queries; ++i) { - ret[i][0] = i; - } - return ret; + const std::vector>& get_aligned_reads() const { + return this->ec_read_ids; } template diff --git a/src/mSWEEP.cpp b/src/mSWEEP.cpp index 9f40d8f..1ee66b4 100644 --- a/src/mSWEEP.cpp +++ b/src/mSWEEP.cpp @@ -353,6 +353,7 @@ int main (int argc, char *argv[]) { } alignment.read(args.value("themisto-mode"), strands); + alignment.collapse(); } catch (std::exception &e) { finalize("Reading the pseudoalignments failed:\n " + std::string(e.what()) + "\nexiting\n", log, true); From 3a4c7fbf55ee3db972dbe61b0f82274b725677ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Thu, 15 Aug 2024 16:16:30 +0300 Subject: [PATCH 03/22] Parallelize hashing the alignments to identify equivalence classes. --- include/mSWEEP_alignment.hpp | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/include/mSWEEP_alignment.hpp b/include/mSWEEP_alignment.hpp index ad159ee..21b063a 100644 --- a/include/mSWEEP_alignment.hpp +++ b/include/mSWEEP_alignment.hpp @@ -167,22 +167,30 @@ class Alignment { void collapse() { std::unordered_map mymap; + bm::bvector<> collapsed_bits; size_t ec_id = 0; size_t unl = std::hash>{}(std::vector(this->n_targets, false)); + + std::vector hashes(this->n_queries); + +#pragma omp parallel for schedule(static) for (size_t i = 0; i < this->n_queries; ++i) { std::vector aln(this->n_targets, false); for (size_t j = 0; j < this->n_targets; ++j) { aln[j] = this->bits[i*this->n_targets + j]; } - size_t hash = std::hash>{}(aln); - bool any_aligned = hash != unl; + hashes[i] = std::hash>{}(aln); + } + + for (size_t i = 0; i < this->n_queries; ++i) { + bool any_aligned = hashes[i] != unl; if (any_aligned) { - auto got = mymap.find(hash); + auto got = mymap.find(hashes[i]); if (got == mymap.end()) { - mymap.insert(std::make_pair(hash, ec_id)); + mymap.insert(std::make_pair(hashes[i], ec_id)); for (size_t j = 0; j < this->n_targets; ++j) { - collapsed_bits[ec_id*this->n_targets + j] = aln[j]; + collapsed_bits[ec_id*this->n_targets + j] = this->bits[i*this->n_targets + j]; } this->ec_counts.emplace_back(1); this->ec_read_ids.emplace_back(std::vector({(uint32_t)i})); @@ -199,14 +207,9 @@ class Alignment { size_t n_ecs() const { return this->ec_counts.size(); }; size_t n_reads() const { return this->n_queries; }; size_t reads_in_ec(const size_t i) const { return this->ec_counts[i]; }; + size_t get_n_targets() const { return this->n_targets; }; - std::vector operator()(const size_t row) const { - std::vector ret(this->n_groups, 0); - for (size_t i = 0; i < this->n_targets; ++i) { - ret[this->group_indicators[i]] += this->bits[row*n_targets + i]; - } - return ret; - } + bool operator()(const size_t row, const size_t col) const { return this->bits[row*this->n_targets + col]; } const std::vector>& get_aligned_reads() const { return this->ec_read_ids; @@ -223,6 +226,8 @@ class Alignment { this->n_groups = _n_groups + 1; } + const std::vector& get_groups() const { return this->group_indicators; }; + }; } From 254184992505330288733fad125ab8c419f97c60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Thu, 15 Aug 2024 16:17:05 +0300 Subject: [PATCH 04/22] Refactoring in creating the likelihood matrix. --- include/Likelihood.hpp | 38 +++++++++++++++++++++-------- include/mSWEEP_openmp_config.hpp.in | 3 +++ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 7ab25c9..53e6538 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -108,43 +108,61 @@ class LL_WOR21 : public Likelihood { void fill_ll_mat(const mSWEEP::Alignment &alignment, const std::vector &group_sizes, const size_t n_groups, const size_t min_hits) { size_t num_ecs = alignment.n_ecs(); + size_t n_targets = alignment.get_n_targets(); + + bm::sparse_vector> group_counts; + for (size_t i = 0; i < num_ecs; ++i) { + for (size_t j = 0; j < n_targets; ++j) { + if (alignment(i, j)) { + group_counts.inc(alignment.get_groups()[j]*num_ecs + i); + } + } + } bool mask_groups = min_hits > 0; this->groups_mask = std::vector(n_groups, !mask_groups); std::vector masked_group_sizes; + std::vector groups_pos(n_groups, 0); + size_t n_masked_groups = 0; if (mask_groups) { std::vector group_hit_counts(n_groups, (size_t)0); // Create mask identifying groups that have at least 1 alignment +#pragma omp parallel for schedule(static) reduction(vec_size_t_plus:group_hit_counts) for (size_t i = 0; i < num_ecs; ++i) { - std::vector ec_hit_counts = alignment(i); for (size_t j = 0; j < n_groups; ++j) { - group_hit_counts[j] += ec_hit_counts[j] * alignment.reads_in_ec(i); + group_hit_counts[j] += group_counts[i*n_groups + j] * alignment.reads_in_ec(i); } } + for (size_t i = 0; i < n_groups; ++i) { this->groups_mask[i] = groups_mask[i] || (group_hit_counts[i] >= min_hits); if (this->groups_mask[i]) { + groups_pos[i] = n_masked_groups; masked_group_sizes.push_back(group_sizes[i]); + ++n_masked_groups; } } } else { masked_group_sizes = group_sizes; +#pragma omp parallel for schedule(static) + for (size_t i = 0; i < n_groups; ++i) { + groups_pos[i] = i; + } } - size_t n_masked_groups = masked_group_sizes.size(); + n_masked_groups = masked_group_sizes.size(); this->update_bb_parameters(masked_group_sizes, n_masked_groups, this->bb_constants); const seamat::DenseMatrix &precalc_lls_mat = this->precalc_lls(masked_group_sizes, n_masked_groups); this->log_likelihoods.resize(n_masked_groups, num_ecs, std::log(this->zero_inflation)); - for (size_t j = 0; j < num_ecs; ++j) { - size_t groups_pos = 0; - std::vector ec_hit_counts = alignment(j); - for (size_t i = 0; i < n_groups; ++i) { + +#pragma omp parallel for schedule(static) + for (size_t i = 0; i < n_groups; ++i) { if (this->groups_mask[i]) { - this->log_likelihoods(groups_pos, j) = precalc_lls_mat(groups_pos, ec_hit_counts[i]); - ++groups_pos; + for (size_t j = 0; j < num_ecs; ++j) { + this->log_likelihoods(groups_pos[i], j) = precalc_lls_mat(groups_pos[i], group_counts[i*num_ecs + j]); + } } - } } } diff --git a/include/mSWEEP_openmp_config.hpp.in b/include/mSWEEP_openmp_config.hpp.in index 0c78efe..3b2f295 100644 --- a/include/mSWEEP_openmp_config.hpp.in +++ b/include/mSWEEP_openmp_config.hpp.in @@ -33,6 +33,9 @@ #pragma omp declare reduction(vec_double_plus : std::vector : \ std::transform(omp_out.begin(), omp_out.end(), omp_in.begin(), omp_out.begin(), std::plus())) \ initializer(omp_priv = decltype(omp_orig)(omp_orig.size())) +#pragma omp declare reduction(vec_size_t_plus : std::vector : \ + std::transform(omp_out.begin(), omp_out.end(), omp_in.begin(), omp_out.begin(), std::plus())) \ + initializer(omp_priv = decltype(omp_orig)(omp_orig.size())) #endif From 11f3b8ab45d8f7e882e41f217aae0660d0a621d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Fri, 16 Aug 2024 11:23:11 +0300 Subject: [PATCH 05/22] Build the equivalence classes partially in parallel. --- include/mSWEEP_alignment.hpp | 73 ++++++++++++++++++++++++++---------- 1 file changed, 54 insertions(+), 19 deletions(-) diff --git a/include/mSWEEP_alignment.hpp b/include/mSWEEP_alignment.hpp index 21b063a..d0fdbb6 100644 --- a/include/mSWEEP_alignment.hpp +++ b/include/mSWEEP_alignment.hpp @@ -166,41 +166,76 @@ class Alignment { } void collapse() { - std::unordered_map mymap; + size_t n_threads = 1; +#if defined(MSWEEP_OPENMP_SUPPORT) && (MSWEEP_OPENMP_SUPPORT) == 1 +#pragma omp parallel + { + n_threads = omp_get_num_threads(); + } +#endif - bm::bvector<> collapsed_bits; - size_t ec_id = 0; - size_t unl = std::hash>{}(std::vector(this->n_targets, false)); std::vector hashes(this->n_queries); - -#pragma omp parallel for schedule(static) + size_t unl = std::hash>{}(std::vector(this->n_targets, false)); + std::vector>> mymap(n_threads); + std::vector> my_ec_counts(n_threads); + std::vector>> my_ec_read_ids(n_threads); + size_t ec_id = 0; +#pragma omp parallel for schedule(static) firstprivate(ec_id) for (size_t i = 0; i < this->n_queries; ++i) { std::vector aln(this->n_targets, false); for (size_t j = 0; j < this->n_targets; ++j) { aln[j] = this->bits[i*this->n_targets + j]; } hashes[i] = std::hash>{}(aln); - } - - for (size_t i = 0; i < this->n_queries; ++i) { bool any_aligned = hashes[i] != unl; if (any_aligned) { - auto got = mymap.find(hashes[i]); - if (got == mymap.end()) { - mymap.insert(std::make_pair(hashes[i], ec_id)); - for (size_t j = 0; j < this->n_targets; ++j) { - collapsed_bits[ec_id*this->n_targets + j] = this->bits[i*this->n_targets + j]; - } - this->ec_counts.emplace_back(1); - this->ec_read_ids.emplace_back(std::vector({(uint32_t)i})); + auto got = mymap[omp_get_thread_num()].find(hashes[i]); + if (got == mymap[omp_get_thread_num()].end()) { + mymap[omp_get_thread_num()].insert(std::make_pair(hashes[i], std::make_pair(ec_id, i*this->n_targets)));//i*this->n_targets)); + my_ec_counts[omp_get_thread_num()].emplace_back(1); + my_ec_read_ids[omp_get_thread_num()].emplace_back(std::vector({(uint32_t)i})); ++ec_id; } else { - ++this->ec_counts[got->second]; - this->ec_read_ids[got->second].emplace_back(i); + ++my_ec_counts[omp_get_thread_num()][got->second.first]; + my_ec_read_ids[omp_get_thread_num()][got->second.first].emplace_back(i); + } + } + } + + bm::bvector<> collapsed_bits; + if (n_threads == 1) { + this->ec_counts = std::move(my_ec_counts[0]); + this->ec_read_ids = std::move(my_ec_read_ids[0]); + for (auto kv : mymap[0]) { + for (size_t k = 0; k < this->n_targets; ++k) { + collapsed_bits[kv.second.first*this->n_targets + k] = bits[kv.second.second + k]; + } + } + } else { + std::unordered_map map; + size_t j = 0; + for (size_t i = 0; i < n_threads; ++i) { + for (auto kv : mymap[i]) { + auto got = map.find(kv.first); + if (got == map.end()) { + map.insert(std::make_pair(kv.first, j)); + this->ec_counts.emplace_back(my_ec_counts[i][kv.second.first]); + this->ec_read_ids.emplace_back(my_ec_read_ids[i][kv.second.first]); + for (size_t k = 0; k < this->n_targets; ++k) { + collapsed_bits[j*this->n_targets + k] = bits[kv.second.second + k]; + } + ++j; + } else { + this->ec_counts[got->second] += my_ec_counts[i][kv.second.first]; + for (size_t k = 0; k < my_ec_read_ids[i][kv.second.first].size(); ++k) { + this->ec_read_ids[got->second].emplace_back(my_ec_read_ids[i][kv.second.first][k]); + } + } } } } + this->bits = std::move(collapsed_bits); } From 7480713fd4456cc55cab6b1a845bc0b5551482fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Fri, 16 Aug 2024 11:45:51 +0300 Subject: [PATCH 06/22] Move the values in merging instead of copying. --- include/mSWEEP_alignment.hpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/include/mSWEEP_alignment.hpp b/include/mSWEEP_alignment.hpp index d0fdbb6..4b55579 100644 --- a/include/mSWEEP_alignment.hpp +++ b/include/mSWEEP_alignment.hpp @@ -220,17 +220,27 @@ class Alignment { auto got = map.find(kv.first); if (got == map.end()) { map.insert(std::make_pair(kv.first, j)); - this->ec_counts.emplace_back(my_ec_counts[i][kv.second.first]); - this->ec_read_ids.emplace_back(my_ec_read_ids[i][kv.second.first]); + // Move leaves the values in my_X indeterminate but that's ok + // since each element in my_X is accessed only once + this->ec_counts.insert(this->ec_counts.end(), + std::make_move_iterator(my_ec_counts[i].begin() + kv.second.first), + std::make_move_iterator(my_ec_counts[i].begin() + kv.second.first + 1)); + this->ec_read_ids.emplace_back(std::vector()); + this->ec_read_ids.back().insert(this->ec_read_ids.back().end(), + std::make_move_iterator(my_ec_read_ids[i][kv.second.first].begin()), + std::make_move_iterator(my_ec_read_ids[i][kv.second.first].end())); + for (size_t k = 0; k < this->n_targets; ++k) { collapsed_bits[j*this->n_targets + k] = bits[kv.second.second + k]; } ++j; } else { + // This is where the move would be bad if the elements were accessed more than once this->ec_counts[got->second] += my_ec_counts[i][kv.second.first]; - for (size_t k = 0; k < my_ec_read_ids[i][kv.second.first].size(); ++k) { - this->ec_read_ids[got->second].emplace_back(my_ec_read_ids[i][kv.second.first][k]); - } + + this->ec_read_ids[got->second].insert(this->ec_read_ids[got->second].end(), + std::make_move_iterator(my_ec_read_ids[i][kv.second.first].begin()), + std::make_move_iterator(my_ec_read_ids[i][kv.second.first].end())); } } } From a049713e3b4a93ca07025170e0f6dd7251462f39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Fri, 16 Aug 2024 12:05:01 +0300 Subject: [PATCH 07/22] Simplify the hashing. --- include/mSWEEP_alignment.hpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/include/mSWEEP_alignment.hpp b/include/mSWEEP_alignment.hpp index 4b55579..c450ee4 100644 --- a/include/mSWEEP_alignment.hpp +++ b/include/mSWEEP_alignment.hpp @@ -175,24 +175,22 @@ class Alignment { #endif - std::vector hashes(this->n_queries); - size_t unl = std::hash>{}(std::vector(this->n_targets, false)); std::vector>> mymap(n_threads); std::vector> my_ec_counts(n_threads); std::vector>> my_ec_read_ids(n_threads); size_t ec_id = 0; #pragma omp parallel for schedule(static) firstprivate(ec_id) for (size_t i = 0; i < this->n_queries; ++i) { - std::vector aln(this->n_targets, false); - for (size_t j = 0; j < this->n_targets; ++j) { - aln[j] = this->bits[i*this->n_targets + j]; - } - hashes[i] = std::hash>{}(aln); - bool any_aligned = hashes[i] != unl; - if (any_aligned) { - auto got = mymap[omp_get_thread_num()].find(hashes[i]); + if (bits.any_range(i*this->n_targets, (i + 1)*this->n_targets - 1)) { + size_t hash = 0; + for (size_t j = 0; j < this->n_targets; ++j) { + if (this->bits[i*this->n_targets + j]) { + hash ^= j + 0x517cc1b727220a95 + (hash << 6) + (hash >> 2); + } + } + auto got = mymap[omp_get_thread_num()].find(hash); if (got == mymap[omp_get_thread_num()].end()) { - mymap[omp_get_thread_num()].insert(std::make_pair(hashes[i], std::make_pair(ec_id, i*this->n_targets)));//i*this->n_targets)); + mymap[omp_get_thread_num()].insert(std::make_pair(hash, std::make_pair(ec_id, i*this->n_targets))); my_ec_counts[omp_get_thread_num()].emplace_back(1); my_ec_read_ids[omp_get_thread_num()].emplace_back(std::vector({(uint32_t)i})); ++ec_id; From 3791ba2f57aa99d2f5d7fc35dd538439371c4825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Fri, 16 Aug 2024 12:24:19 +0300 Subject: [PATCH 08/22] Parallelize counting the alignments vs. each group. --- include/Likelihood.hpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 53e6538..bf04f0f 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -110,15 +110,30 @@ class LL_WOR21 : public Likelihood { size_t num_ecs = alignment.n_ecs(); size_t n_targets = alignment.get_n_targets(); - bm::sparse_vector> group_counts; + size_t n_threads = 1; +#if defined(MSWEEP_OPENMP_SUPPORT) && (MSWEEP_OPENMP_SUPPORT) == 1 +#pragma omp parallel + { + n_threads = omp_get_num_threads(); + } +#endif + + // This double loop is currently the slowest part in the input reading + std::vector>> local_counts(n_threads); +#pragma omp parallel for schedule(static) for (size_t i = 0; i < num_ecs; ++i) { for (size_t j = 0; j < n_targets; ++j) { if (alignment(i, j)) { - group_counts.inc(alignment.get_groups()[j]*num_ecs + i); + local_counts[omp_get_thread_num()].inc(alignment.get_groups()[j]*num_ecs + i); } } } + bm::sparse_vector> group_counts = std::move(local_counts[0]); + for (size_t i = 1; i < n_threads; ++i) { + group_counts.merge(local_counts[i]); + } + bool mask_groups = min_hits > 0; this->groups_mask = std::vector(n_groups, !mask_groups); std::vector masked_group_sizes; From 379a59ac37b822fd0d78fd35921467f3b063ccf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Fri, 16 Aug 2024 12:24:44 +0300 Subject: [PATCH 09/22] Clean up --- include/mSWEEP_alignment.hpp | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/include/mSWEEP_alignment.hpp b/include/mSWEEP_alignment.hpp index c450ee4..6e73e4d 100644 --- a/include/mSWEEP_alignment.hpp +++ b/include/mSWEEP_alignment.hpp @@ -32,7 +32,6 @@ #include #include -#include #include namespace mSWEEP { @@ -52,19 +51,6 @@ class Alignment { } void ReadPlaintextLine(const size_t n_targets, std::string &line, bm::bvector<>::bulk_insert_iterator &it) { - // telescope::ReadPlaintextLine - // - // Reads a line in a plaintext alignment file from Themisto - // (https://github.com/algbio/themisto) into the bm::bvector<> `it` - // inserts to. - // - // Input: - // `n_targets`: number of pseudoalignment targets (reference - // sequences). It's not possible to infer this from the Themisto - // file format so has to be provided separately. - // `line`: the line from the alignment file to read in. - // `it`: insert iterator to the bm::bvector<> variable for storing the alignment. - // std::string part; std::stringstream partition(line); @@ -79,24 +65,6 @@ class Alignment { } size_t ReadPlaintextAlignment(const size_t n_targets, std::string &line, std::istream *stream, bm::bvector<> *ec_configs) { - // telescope::ReadPlaintextAlignment - // - // Reads a plaintext alignment file from Themisto - // (https://github.com/algbio/themisto) into `*ec_configs` and - // return the number of reads in the file (both unaligned and - // aligned). - // - // Input: - // `n_targets`: number of pseudoalignment targets (reference - // sequences). It's not possible to infer this from the Themisto - // file format so has to be provided separately. - // `line`: read each line into this variable. - // NOTE: the contents of the *first* line should already be stored in this variable. - // `stream`: pointer to an istream opened on the pseudoalignment file. - // `ec_configs`: pointer to the output variable that will contain the alignment. - // Output: - // `n_reads`: total number of reads in the pseudoalignment (unaligned + aligned). - // bm::bvector<>::bulk_insert_iterator it(*ec_configs); // Bulk insert iterator buffers the insertions size_t n_reads = 1; @@ -174,7 +142,6 @@ class Alignment { } #endif - std::vector>> mymap(n_threads); std::vector> my_ec_counts(n_threads); std::vector>> my_ec_read_ids(n_threads); From d10cdb98f6ec542a5211366b4aa68c519565df81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Fri, 16 Aug 2024 12:33:37 +0300 Subject: [PATCH 10/22] Better status messages with --verbose. --- src/mSWEEP.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/mSWEEP.cpp b/src/mSWEEP.cpp index 1ee66b4..5cf5cea 100644 --- a/src/mSWEEP.cpp +++ b/src/mSWEEP.cpp @@ -353,7 +353,10 @@ int main (int argc, char *argv[]) { } alignment.read(args.value("themisto-mode"), strands); + log << " read alignments for " << alignment.n_reads() << " reads" << '\n'; + log << "Building equivalence classes" << '\n'; alignment.collapse(); + log << " found " << alignment.n_ecs() << " unique alignments" << '\n'; } catch (std::exception &e) { finalize("Reading the pseudoalignments failed:\n " + std::string(e.what()) + "\nexiting\n", log, true); @@ -363,6 +366,7 @@ int main (int argc, char *argv[]) { // Use the alignment data to populate the log_likelihoods matrix. try { // Read the pseudoalignment + log << "Computing the likelihood matrix" << '\n'; if (n_groups <= std::numeric_limits::max()) { alignment.add_groups(static_cast*>(&(*reference))->get_group_indicators(i)); } else if (n_groups <= std::numeric_limits::max()) { @@ -383,7 +387,6 @@ int main (int argc, char *argv[]) { // Note: this is also only used by the root process in MPI configuration. mSWEEP::ConstructSample(alignment, args.value("iters"), args.value("bootstrap-count"), args.value("seed"), bin_reads, sample); - log << " read " << alignment.n_ecs() << " unique alignments" << '\n'; log.flush(); } else { try { From 237f5b360f9682272ce21e967bfd482ccd4d64f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Fri, 16 Aug 2024 17:47:41 +0300 Subject: [PATCH 11/22] Simplify collapse() --- include/mSWEEP_alignment.hpp | 78 +++++++++++++----------------------- 1 file changed, 28 insertions(+), 50 deletions(-) diff --git a/include/mSWEEP_alignment.hpp b/include/mSWEEP_alignment.hpp index 6e73e4d..f0ef450 100644 --- a/include/mSWEEP_alignment.hpp +++ b/include/mSWEEP_alignment.hpp @@ -142,11 +142,8 @@ class Alignment { } #endif - std::vector>> mymap(n_threads); - std::vector> my_ec_counts(n_threads); - std::vector>> my_ec_read_ids(n_threads); - size_t ec_id = 0; -#pragma omp parallel for schedule(static) firstprivate(ec_id) + std::vector>> mymap(n_threads); +#pragma omp parallel for schedule(static) for (size_t i = 0; i < this->n_queries; ++i) { if (bits.any_range(i*this->n_targets, (i + 1)*this->n_targets - 1)) { size_t hash = 0; @@ -157,60 +154,41 @@ class Alignment { } auto got = mymap[omp_get_thread_num()].find(hash); if (got == mymap[omp_get_thread_num()].end()) { - mymap[omp_get_thread_num()].insert(std::make_pair(hash, std::make_pair(ec_id, i*this->n_targets))); - my_ec_counts[omp_get_thread_num()].emplace_back(1); - my_ec_read_ids[omp_get_thread_num()].emplace_back(std::vector({(uint32_t)i})); - ++ec_id; + mymap[omp_get_thread_num()].insert(std::make_pair(hash, std::vector({(uint32_t)i}))); } else { - ++my_ec_counts[omp_get_thread_num()][got->second.first]; - my_ec_read_ids[omp_get_thread_num()][got->second.first].emplace_back(i); + got->second.emplace_back(i); } } } - bm::bvector<> collapsed_bits; - if (n_threads == 1) { - this->ec_counts = std::move(my_ec_counts[0]); - this->ec_read_ids = std::move(my_ec_read_ids[0]); - for (auto kv : mymap[0]) { - for (size_t k = 0; k < this->n_targets; ++k) { - collapsed_bits[kv.second.first*this->n_targets + k] = bits[kv.second.second + k]; - } - } - } else { - std::unordered_map map; - size_t j = 0; - for (size_t i = 0; i < n_threads; ++i) { - for (auto kv : mymap[i]) { - auto got = map.find(kv.first); - if (got == map.end()) { - map.insert(std::make_pair(kv.first, j)); - // Move leaves the values in my_X indeterminate but that's ok - // since each element in my_X is accessed only once - this->ec_counts.insert(this->ec_counts.end(), - std::make_move_iterator(my_ec_counts[i].begin() + kv.second.first), - std::make_move_iterator(my_ec_counts[i].begin() + kv.second.first + 1)); - this->ec_read_ids.emplace_back(std::vector()); - this->ec_read_ids.back().insert(this->ec_read_ids.back().end(), - std::make_move_iterator(my_ec_read_ids[i][kv.second.first].begin()), - std::make_move_iterator(my_ec_read_ids[i][kv.second.first].end())); - - for (size_t k = 0; k < this->n_targets; ++k) { - collapsed_bits[j*this->n_targets + k] = bits[kv.second.second + k]; - } - ++j; - } else { - // This is where the move would be bad if the elements were accessed more than once - this->ec_counts[got->second] += my_ec_counts[i][kv.second.first]; - - this->ec_read_ids[got->second].insert(this->ec_read_ids[got->second].end(), - std::make_move_iterator(my_ec_read_ids[i][kv.second.first].begin()), - std::make_move_iterator(my_ec_read_ids[i][kv.second.first].end())); - } + std::unordered_map> map; + for (size_t i = 0; i < n_threads; ++i) { + for (auto kv : mymap[i]) { + auto got = map.find(kv.first); + if (got == map.end()) { + map.insert(std::make_pair(kv.first, std::move(kv.second))); + } else { + got->second.insert(got->second.end(), + std::make_move_iterator(kv.second.begin()), + std::make_move_iterator(kv.second.end())); } } } + size_t n_ecs = map.size(); + this->ec_counts = std::vector(n_ecs, 0); + this->ec_read_ids = std::vector>(n_ecs); + size_t i = 0; + bm::bvector<> collapsed_bits; + collapsed_bits.resize(n_ecs*this->n_targets); + for (auto kv : map) { + this->ec_read_ids[i] = std::move(kv.second); + this->ec_counts[i] = this->ec_read_ids[i].size(); + for (size_t k = 0; k < this->n_targets; ++k) { + collapsed_bits[i*this->n_targets + k] = this->bits[this->ec_read_ids[i][0]*this->n_targets + k]; + } + ++i; + } this->bits = std::move(collapsed_bits); } From cb6c814ec8151c29235843011f98d4bb50adbb72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Fri, 16 Aug 2024 18:03:04 +0300 Subject: [PATCH 12/22] Ensure deterministic order for ECs. --- include/mSWEEP_alignment.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/mSWEEP_alignment.hpp b/include/mSWEEP_alignment.hpp index f0ef450..aa0c2fd 100644 --- a/include/mSWEEP_alignment.hpp +++ b/include/mSWEEP_alignment.hpp @@ -33,6 +33,7 @@ #include #include #include +#include namespace mSWEEP { class Alignment { @@ -161,7 +162,7 @@ class Alignment { } } - std::unordered_map> map; + std::map> map; for (size_t i = 0; i < n_threads; ++i) { for (auto kv : mymap[i]) { auto got = map.find(kv.first); From 829519c905ee6ad9aa2175aaa4ef156cda23485e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Fri, 16 Aug 2024 18:24:38 +0300 Subject: [PATCH 13/22] Slight improvement to final merge parallelism in collapse() --- include/mSWEEP_alignment.hpp | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/include/mSWEEP_alignment.hpp b/include/mSWEEP_alignment.hpp index aa0c2fd..13406f1 100644 --- a/include/mSWEEP_alignment.hpp +++ b/include/mSWEEP_alignment.hpp @@ -180,17 +180,28 @@ class Alignment { this->ec_counts = std::vector(n_ecs, 0); this->ec_read_ids = std::vector>(n_ecs); size_t i = 0; - bm::bvector<> collapsed_bits; - collapsed_bits.resize(n_ecs*this->n_targets); - for (auto kv : map) { - this->ec_read_ids[i] = std::move(kv.second); - this->ec_counts[i] = this->ec_read_ids[i].size(); - for (size_t k = 0; k < this->n_targets; ++k) { - collapsed_bits[i*this->n_targets + k] = this->bits[this->ec_read_ids[i][0]*this->n_targets + k]; + std::vector> collapsed_bits(n_threads); + +#pragma omp parallel + { + size_t i = 0; + size_t thread_id = omp_get_thread_num(); + collapsed_bits[thread_id].resize(n_ecs*this->n_targets); + for(auto element = map.begin(); element !=map.end(); ++element, i++) { + if(i%n_threads == thread_id) { + this->ec_read_ids[i] = std::move(element->second); + this->ec_counts[i] = this->ec_read_ids[i].size(); + for (size_t k = 0; k < this->n_targets; ++k) { + collapsed_bits[thread_id][i*this->n_targets + k] = this->bits[this->ec_read_ids[i][0]*this->n_targets + k]; + } + } } - ++i; } - this->bits = std::move(collapsed_bits); + + this->bits.clear(); + for (size_t i = 0; i < n_threads; ++i) { + this->bits.bit_or(collapsed_bits[i]); + } } size_t n_ecs() const { return this->ec_counts.size(); }; From 7f353cb046778b821ecc0d1fdee27495c97320e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Fri, 16 Aug 2024 18:45:18 +0300 Subject: [PATCH 14/22] Faster computing of group_counts (no need to save memory here). --- include/Likelihood.hpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index bf04f0f..6b81b90 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -119,21 +119,16 @@ class LL_WOR21 : public Likelihood { #endif // This double loop is currently the slowest part in the input reading - std::vector>> local_counts(n_threads); + std::vector group_counts(num_ecs*n_groups, 0); #pragma omp parallel for schedule(static) for (size_t i = 0; i < num_ecs; ++i) { for (size_t j = 0; j < n_targets; ++j) { if (alignment(i, j)) { - local_counts[omp_get_thread_num()].inc(alignment.get_groups()[j]*num_ecs + i); + ++group_counts[alignment.get_groups()[j]*num_ecs + i]; } } } - bm::sparse_vector> group_counts = std::move(local_counts[0]); - for (size_t i = 1; i < n_threads; ++i) { - group_counts.merge(local_counts[i]); - } - bool mask_groups = min_hits > 0; this->groups_mask = std::vector(n_groups, !mask_groups); std::vector masked_group_sizes; From 652b236b927977d277d2245c798a6020b1841838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Fri, 16 Aug 2024 18:57:38 +0300 Subject: [PATCH 15/22] Count reads in min-hits like intended. --- include/Likelihood.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 6b81b90..75a2210 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -140,7 +140,7 @@ class LL_WOR21 : public Likelihood { #pragma omp parallel for schedule(static) reduction(vec_size_t_plus:group_hit_counts) for (size_t i = 0; i < num_ecs; ++i) { for (size_t j = 0; j < n_groups; ++j) { - group_hit_counts[j] += group_counts[i*n_groups + j] * alignment.reads_in_ec(i); + group_hit_counts[j] += (group_counts[i*n_groups + j] > 0) * alignment.reads_in_ec(i); } } From 10e53f7968d44560e1f8917577afad2152489e28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Sat, 17 Aug 2024 09:26:50 +0300 Subject: [PATCH 16/22] Fix accessing group_counts in creating the mask. --- include/Likelihood.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 75a2210..17c5099 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -138,9 +138,9 @@ class LL_WOR21 : public Likelihood { std::vector group_hit_counts(n_groups, (size_t)0); // Create mask identifying groups that have at least 1 alignment #pragma omp parallel for schedule(static) reduction(vec_size_t_plus:group_hit_counts) - for (size_t i = 0; i < num_ecs; ++i) { - for (size_t j = 0; j < n_groups; ++j) { - group_hit_counts[j] += (group_counts[i*n_groups + j] > 0) * alignment.reads_in_ec(i); + for (size_t j = 0; j < n_groups; ++j) { + for (size_t i = 0; i < num_ecs; ++i) { + group_hit_counts[j] += (group_counts[j*num_ecs + i] > 0) * alignment.reads_in_ec(i); } } From dba6b846964e83b0fc3542084e16e653a837dae0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Sat, 17 Aug 2024 09:27:20 +0300 Subject: [PATCH 17/22] Revert "Faster computing of group_counts (no need to save memory here)." This reverts commit 7f353cb046778b821ecc0d1fdee27495c97320e5. The optimizations did not end up working on large inputs. --- include/Likelihood.hpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 17c5099..d182a89 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -119,16 +119,21 @@ class LL_WOR21 : public Likelihood { #endif // This double loop is currently the slowest part in the input reading - std::vector group_counts(num_ecs*n_groups, 0); + std::vector>> local_counts(n_threads); #pragma omp parallel for schedule(static) for (size_t i = 0; i < num_ecs; ++i) { for (size_t j = 0; j < n_targets; ++j) { if (alignment(i, j)) { - ++group_counts[alignment.get_groups()[j]*num_ecs + i]; + local_counts[omp_get_thread_num()].inc(alignment.get_groups()[j]*num_ecs + i); } } } + bm::sparse_vector> group_counts = std::move(local_counts[0]); + for (size_t i = 1; i < n_threads; ++i) { + group_counts.merge(local_counts[i]); + } + bool mask_groups = min_hits > 0; this->groups_mask = std::vector(n_groups, !mask_groups); std::vector masked_group_sizes; From 97a2d149759f5255d6fccf94120a6be9c68d3c00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Sat, 17 Aug 2024 09:28:20 +0300 Subject: [PATCH 18/22] Ensure index doesn't get cast to <64bit int. --- include/Likelihood.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index d182a89..15c4ed6 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -124,7 +124,7 @@ class LL_WOR21 : public Likelihood { for (size_t i = 0; i < num_ecs; ++i) { for (size_t j = 0; j < n_targets; ++j) { if (alignment(i, j)) { - local_counts[omp_get_thread_num()].inc(alignment.get_groups()[j]*num_ecs + i); + local_counts[omp_get_thread_num()].inc((size_t)((size_t)alignment.get_groups()[j]*num_ecs) + i); } } } From 1174a4e2929e397a8d6aee7fb8e1a53bc2740f20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 20 Aug 2024 16:11:42 +0300 Subject: [PATCH 19/22] Fix compiling without openmp. --- include/mSWEEP_alignment.hpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/include/mSWEEP_alignment.hpp b/include/mSWEEP_alignment.hpp index 13406f1..a598ce2 100644 --- a/include/mSWEEP_alignment.hpp +++ b/include/mSWEEP_alignment.hpp @@ -153,9 +153,16 @@ class Alignment { hash ^= j + 0x517cc1b727220a95 + (hash << 6) + (hash >> 2); } } +#if defined(MSWEEP_OPENMP_SUPPORT) && (MSWEEP_OPENMP_SUPPORT) == 1 auto got = mymap[omp_get_thread_num()].find(hash); if (got == mymap[omp_get_thread_num()].end()) { mymap[omp_get_thread_num()].insert(std::make_pair(hash, std::vector({(uint32_t)i}))); +#else + auto got = mymap[0].find(hash); + if (got == mymap[0].end()) { + mymap[0].insert(std::make_pair(hash, std::vector({(uint32_t)i}))); + +#endif } else { got->second.emplace_back(i); } @@ -185,7 +192,10 @@ class Alignment { #pragma omp parallel { size_t i = 0; - size_t thread_id = omp_get_thread_num(); + size_t thread_id = 0; +#if defined(MSWEEP_OPENMP_SUPPORT) && (MSWEEP_OPENMP_SUPPORT) == 1 + thread_id = omp_get_thread_num(); +#endif collapsed_bits[thread_id].resize(n_ecs*this->n_targets); for(auto element = map.begin(); element !=map.end(); ++element, i++) { if(i%n_threads == thread_id) { From cfb5ff11db5facb391bdc64a9e7297f5fe09f62d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 20 Aug 2024 16:11:50 +0300 Subject: [PATCH 20/22] Add function to accesse specific ec's read IDs. --- include/mSWEEP_alignment.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/mSWEEP_alignment.hpp b/include/mSWEEP_alignment.hpp index a598ce2..36aec10 100644 --- a/include/mSWEEP_alignment.hpp +++ b/include/mSWEEP_alignment.hpp @@ -225,6 +225,8 @@ class Alignment { return this->ec_read_ids; } + const std::vector& reads_assigned_to_ec(size_t ec_id) const { return this->ec_read_ids[ec_id]; } + template void add_groups(const std::vector &grouping) { size_t _n_groups = 0; From 4e28bc4c7542e4bd8d7e9b85f7b00aa46c1943bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 20 Aug 2024 16:21:41 +0300 Subject: [PATCH 21/22] Update mGEMS (can now compile whole project without telescope). --- CMakeLists.txt | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e9d8c88..0bba9e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -168,11 +168,6 @@ endif() include_directories(${CMAKE_BXZSTR_HEADERS}) ## Check dependencies and download them if not given -## BitMagic -set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/external/telescope/external/BitMagic-7.12.3/src/) -include_directories(${CMAKE_BITMAGIC_HEADERS}) -include_directories(${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/external/telescope/include) - ## cxxio if (DEFINED CMAKE_CXXIO_HEADERS) message(STATUS "cxxio headers provided in: ${CMAKE_CXXIO_HEADERS}") @@ -237,10 +232,19 @@ else() set_target_properties(alignment-writer PROPERTIES EXCLUDE_FROM_ALL 1) set(CMAKE_ALIGNMENT_WRITER_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/alignment-writer/include) set(CMAKE_ALIGNMENT_WRITER_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/lib/libalignment-writer.a) + set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/alignment-writer/external/BitMagic-7.12.3/src/) endif() include_directories(${CMAKE_ALIGNMENT_WRITER_HEADERS}) target_link_libraries(mSWEEP ${CMAKE_ALIGNMENT_WRITER_LIBRARY}) +## BitMagic +if (DEFINED CMAKE_BITMAGIC_HEADERS) + message(STATUS "BitMagic headers provided in: ${CMAKE_BITMAGIC_HEADERS}") +else() + message(FATAL_ERROR "Provide BitMagic C++ headers with -DCMAKE_BITMAGIC_HEADERS") +endif() +include_directories(${CMAKE_BITMAGIC_HEADERS}) + ## seamat if (DEFINED CMAKE_SEAMAT_HEADERS) message(STATUS "seamat headers provided in: ${CMAKE_SEAMAT_HEADERS}") @@ -252,6 +256,7 @@ else() SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/seamat" BUILD_IN_SOURCE 0 CMAKE_ARGS -D CMAKE_BUILD_TESTS=0 + -D CMAKE_BITMAGIC_HEADERS=${CMAKE_BITMAGIC_HEADERS} BUILD_COMMAND "" CONFIGURE_COMMAND "" INSTALL_COMMAND "" @@ -301,7 +306,7 @@ if (DEFINED CMAKE_MGEMS_LIBRARY AND DEFINED CMAKE_MGEMS_HEADERS) else() FetchContent_Declare(mGEMS GIT_REPOSITORY https://github.com/PROBIC/mGEMS.git - GIT_TAG v1.3.0 + GIT_TAG v1.3.3 PREFIX "external" SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/external/mGEMS" BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS" @@ -310,9 +315,8 @@ else() -D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS} -D CMAKE_CXXIO_HEADERS=${CMAKE_CXXIO_HEADERS} -D CMAKE_ALIGNMENT_WRITER_HEADERS=${CMAKE_ALIGNMENT_WRITER_HEADERS} - -D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src + -D CMAKE_BITMAGIC_HEADERS=${CMAKE_BITMAGIC_HEADERS} -D CMAKE_SEAMAT_HEADERS=${CMAKE_SEAMAT_HEADERS} - -D CMAKE_TELESCOPE_HEADERS=${CMAKE_TELESCOPE_HEADERS} -D CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -D "CMAKE_C_FLAGS=${CMAKE_C_FLAGS}" -D "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" @@ -321,13 +325,13 @@ else() INSTALL_COMMAND "" ) FetchContent_MakeAvailable(mGEMS) - add_dependencies(mGEMS telescope libalignmentwriter) + add_dependencies(mGEMS libalignmentwriter) add_dependencies(mSWEEP libmgems) set_target_properties(mGEMS PROPERTIES EXCLUDE_FROM_ALL 1) - set(CMAKE_MGEMS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/mGEMS/include) + set(CMAKE_MGEMS_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/external/mGEMS/include ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/include) set(CMAKE_MGEMS_LIBRARY ${CMAKE_CURRENT_BINARY_DIR}/external/mGEMS/lib/libmgems.a) endif() -target_link_libraries(mSWEEP ${CMAKE_MGEMS_LIBRARY} ${CMAKE_TELESCOPE_LIBRARY} ${CMAKE_ALIGNMENT_WRITER_LIBRARY}) +target_link_libraries(mSWEEP ${CMAKE_MGEMS_LIBRARY} ${CMAKE_ALIGNMENT_WRITER_LIBRARY}) include_directories(${CMAKE_MGEMS_HEADERS}) include_directories( From 1ae8a17b086b246b6825e8baf20faf2c075ab248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tommi=20M=C3=A4klin?= Date: Tue, 20 Aug 2024 16:26:00 +0300 Subject: [PATCH 22/22] Fix compiling without openmp. --- include/Likelihood.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/Likelihood.hpp b/include/Likelihood.hpp index 15c4ed6..69d2c85 100644 --- a/include/Likelihood.hpp +++ b/include/Likelihood.hpp @@ -124,7 +124,11 @@ class LL_WOR21 : public Likelihood { for (size_t i = 0; i < num_ecs; ++i) { for (size_t j = 0; j < n_targets; ++j) { if (alignment(i, j)) { +#if defined(MSWEEP_OPENMP_SUPPORT) && (MSWEEP_OPENMP_SUPPORT) == 1 local_counts[omp_get_thread_num()].inc((size_t)((size_t)alignment.get_groups()[j]*num_ecs) + i); +#else + local_counts[0].inc((size_t)((size_t)alignment.get_groups()[j]*num_ecs) + i); +#endif } } }