Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up XG construction when IDs don't start at 1 #3

Merged
merged 4 commits into from
Jun 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ set(sdsl-lite-divsufsort_LIB "${INSTALL_DIR}/src/sdsl-lite-build/external/libdiv
# mmmultimap (memory mapped multimap)
ExternalProject_Add(mmmultimap
GIT_REPOSITORY "https://github.com/ekg/mmmultimap.git"
GIT_TAG "8a76cec0b819de6e2b855f4edc29993f75b1b26b"
GIT_TAG "b92a5c8826141d61413546278719724e0f612c39"
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND "")
Expand Down Expand Up @@ -109,8 +109,8 @@ set(gfakluge_tinyFA_INCLUDE "${INSTALL_DIR}/src/gfakluge/src/tinyFA")

# In-place Parallel Super Scalar Samplesort (IPS⁴o), header only
ExternalProject_Add(ips4o
GIT_REPOSITORY "https://github.com/SaschaWitt/ips4o.git"
GIT_TAG "bff3ccf0bf349497f2bb10f825d160b792236367"
GIT_REPOSITORY "https://github.com/vgteam/ips4o.git"
GIT_TAG "22069381cc1bf2df07ee1ff47f6b6073fcfb4508"
INSTALL_COMMAND ""
BUILD_COMMAND ""
CONFIGURE_COMMAND "")
Expand All @@ -121,6 +121,7 @@ set(CMAKE_BUILD_TYPE Release)

add_library(xg_objs OBJECT src/xg.cpp)

add_dependencies(xg_objs handlegraph)
add_dependencies(xg_objs sdsl-lite)
add_dependencies(xg_objs mmmultimap)
add_dependencies(xg_objs tayweeargs)
Expand Down
34 changes: 17 additions & 17 deletions src/xg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

#include "gfakluge.hpp"

#define VERBOSE_DEBUG
//#define VERBOSE_DEBUG
//#define debug_algorithms
//#define debug_component_index

Expand Down Expand Up @@ -383,31 +383,31 @@ void XG::from_gfa(const std::string& gfa_filename, bool validate, std::string ba
// set up our enumerators
auto for_each_sequence = [&](const std::function<void(const std::string& seq, const nid_t& node_id)>& lambda) {
gfa.for_each_sequence_line_in_file(filename, [&](gfak::sequence_elem s) {
nid_t node_id = std::stol(s.name);
lambda(s.sequence, node_id);
});
nid_t node_id = std::stol(s.name);
lambda(s.sequence, node_id);
});
};
auto for_each_edge = [&](const std::function<void(const nid_t& from_id, const bool& from_rev,
const nid_t& to_id, const bool& to_rev)>& lambda) {
gfa.for_each_edge_line_in_file(filename, [&](gfak::edge_elem e) {
if (e.source_name.empty()) return;
nid_t from_id = std::stol(e.source_name);
bool from_rev = !e.source_orientation_forward;
nid_t to_id = std::stol(e.sink_name);
bool to_rev = !e.sink_orientation_forward;
lambda(from_id, from_rev, to_id, to_rev);
});
if (e.source_name.empty()) return;
nid_t from_id = std::stol(e.source_name);
bool from_rev = !e.source_orientation_forward;
nid_t to_id = std::stol(e.sink_name);
bool to_rev = !e.sink_orientation_forward;
lambda(from_id, from_rev, to_id, to_rev);
});
};
auto for_each_path_element = [&](const std::function<void(const std::string& path_name,
const nid_t& node_id, const bool& is_rev,
const std::string& cigar)>& lambda) {
gfa.for_each_path_element_in_file(filename, [&](const std::string& path_name_raw, const std::string& node_id_str,
bool is_rev, const std::string& cigar) {
nid_t node_id = std::stol(node_id_str);
std::string path_name = path_name_raw;
path_name.erase(std::remove_if(path_name.begin(), path_name.end(), [](char c) { return std::isspace(c); }), path_name.end());
lambda(path_name, node_id, is_rev, cigar);
});
nid_t node_id = std::stol(node_id_str);
std::string path_name = path_name_raw;
path_name.erase(std::remove_if(path_name.begin(), path_name.end(), [](char c) { return std::isspace(c); }), path_name.end());
lambda(path_name, node_id, is_rev, cigar);
});
};
from_enumerators(for_each_sequence, for_each_edge, for_each_path_element, validate, basename);
}
Expand Down Expand Up @@ -587,7 +587,7 @@ void XG::from_enumerators(const std::function<void(const std::function<void(cons
edge_from_to_mm.append(as_integer(from_handle), as_integer(to_handle));
edge_to_from_mm.append(as_integer(to_handle), as_integer(from_handle));
});
handle_t max_handle = number_bool_packing::pack(max_id, true);
handle_t max_handle = number_bool_packing::pack(r_iv.size(), true);
edge_from_to_mm.index(as_integer(max_handle));
edge_to_from_mm.index(as_integer(max_handle));

Expand Down
16 changes: 5 additions & 11 deletions src/xg.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,11 @@
#include "sdsl/csa_wt.hpp"
#include "sdsl/suffix_arrays.hpp"

#include <handlegraph/types.hpp>
#include <handlegraph/iteratee.hpp>
#include <handlegraph/util.hpp>
#include <handlegraph/handle_graph.hpp>
//#include <handlegraph/path_handle_graph.hpp>
#include <handlegraph/path_position_handle_graph.hpp>
//#include <handlegraph/mutable_handle_graph.hpp>
//#include <handlegraph/mutable_path_handle_graph.hpp>
//#include <handlegraph/mutable_path_mutable_handle_graph.hpp>
//#include <handlegraph/deletable_handle_graph.hpp>
//#include <handlegraph/mutable_path_deletable_handle_graph.hpp>
#include "handlegraph/types.hpp"
#include "handlegraph/iteratee.hpp"
#include "handlegraph/util.hpp"
#include "handlegraph/handle_graph.hpp"
#include "handlegraph/path_position_handle_graph.hpp"

#include "mmmultimap.hpp"

Expand Down