diff --git a/CMakeLists.txt b/CMakeLists.txt index 312732fd3..da1c80382 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,8 +6,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) configure_file( - ${DS2I_SOURCE_DIR}/include/ds2i_config.hpp.in - ${DS2I_SOURCE_DIR}/include/ds2i_config.hpp + ${DS2I_SOURCE_DIR}/include/pisa/ds2i_config.hpp.in + ${DS2I_SOURCE_DIR}/include/pisa/ds2i_config.hpp ESCAPE_QUOTES) if(NOT CMAKE_BUILD_TYPE) @@ -31,6 +31,7 @@ set_target_properties(gumbo::gumbo PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${CMAKE_CURRENT_SOURCE_DIR}/external/gumbo-parser/src) set_property(TARGET gumbo::gumbo APPEND PROPERTY IMPORTED_LOCATION ${CMAKE_BINARY_DIR}/gumbo-parser/lib/libgumbo.a) +add_dependencies( gumbo::gumbo gumbo-external ) # Add code coverage list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/external/CMake-codecov/cmake") @@ -63,11 +64,29 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) link_libraries(Threads::Threads) -# add the root directory to include path to make includes absolute -include_directories(${DS2I_SOURCE_DIR}/external - ${STXXL_INCLUDE_DIRS} - ${DS2I_SOURCE_DIR}/include - ) + +include_directories(include) +add_library(pisa INTERFACE) +target_include_directories(pisa INTERFACE + $ +) +target_link_libraries(pisa INTERFACE + Threads::Threads + Boost::boost + QMX + mio + ParallelSTL + GSL + FastPFor + streamvbyte + MaskedVByte + simdcomp + gumbo::gumbo + Boost::filesystem + Porter2 + warcpp +) +target_include_directories(pisa INTERFACE external) add_subdirectory(src) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 41d6b30f3..3c8a2aaef 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -1,43 +1,19 @@ add_executable(index_perftest index_perftest.cpp) target_link_libraries(index_perftest - Boost::boost - mio - FastPFor - streamvbyte - MaskedVByte - QMX - simdcomp - ParallelSTL - GSL - ) + pisa +) add_executable(perftest_interpolative perftest_interpolative.cpp) target_link_libraries(perftest_interpolative - Boost::boost - mio - FastPFor - QMX - simdcomp - GSL - ) + pisa +) add_executable(selective_queries selective_queries.cpp) target_link_libraries(selective_queries - Boost::boost - mio - FastPFor - streamvbyte - MaskedVByte - QMX - simdcomp - ParallelSTL - GSL - ) + pisa +) add_executable(scan_perftest scan_perftest.cpp) target_link_libraries(scan_perftest - Boost::boost - mio - ParallelSTL - GSL - ) + pisa +) diff --git a/benchmarks/index_perftest.cpp b/benchmarks/index_perftest.cpp index 45234bea2..f7e1d9dd6 100644 --- a/benchmarks/index_perftest.cpp +++ b/benchmarks/index_perftest.cpp @@ -4,9 +4,9 @@ #include "index_types.hpp" #include "util/util.hpp" -using ds2i::logger; -using ds2i::get_time_usecs; -using ds2i::do_not_optimize_away; +using pisa::logger; +using pisa::get_time_usecs; +using pisa::do_not_optimize_away; template void perftest(IndexType const& index, std::string const& type) @@ -65,7 +65,7 @@ void perftest(IndexType const& index, std::string const& type) if (size < min_length) continue; skip_values.emplace_back(i, std::vector()); - for (size_t i = 0; i < std::min(ds2i::ceil_div(size, skip), + for (size_t i = 0; i < std::min(pisa::ceil_div(size, skip), max_calls_per_list); ++i) { reader.move(i * skip); skip_values.back().second.push_back(reader.docid()); @@ -106,7 +106,7 @@ void perftest(const char* index_filename, std::string const& type) logger() << "Loading index from " << index_filename << std::endl; IndexType index; mio::mmap_source m(index_filename); - ds2i::mapper::map(index, m, ds2i::mapper::map_flags::warmup); + pisa::mapper::map(index, m, pisa::mapper::map_flags::warmup); perftest(index, type); perftest(index, type); @@ -115,7 +115,7 @@ void perftest(const char* index_filename, std::string const& type) int main(int argc, const char** argv) { - using namespace ds2i; + using namespace pisa; if (argc != 3) { std::cerr << "Usage: " << argv[0] diff --git a/benchmarks/perftest_interpolative.cpp b/benchmarks/perftest_interpolative.cpp index 07fd415bf..fdee1c5d5 100644 --- a/benchmarks/perftest_interpolative.cpp +++ b/benchmarks/perftest_interpolative.cpp @@ -6,7 +6,7 @@ int main() { - using namespace ds2i; + using namespace pisa; static const size_t size = interpolative_block::block_size; static const size_t runs = 1 << 20; diff --git a/benchmarks/scan_perftest.cpp b/benchmarks/scan_perftest.cpp index c44f28626..4ca63db8d 100644 --- a/benchmarks/scan_perftest.cpp +++ b/benchmarks/scan_perftest.cpp @@ -5,18 +5,18 @@ #include "sequence/uniform_partitioned_sequence.hpp" #include "util/util.hpp" -using ds2i::logger; -using ds2i::get_time_usecs; -using ds2i::do_not_optimize_away; +using pisa::logger; +using pisa::get_time_usecs; +using pisa::do_not_optimize_away; template void perftest(const char* index_filename) { - typedef ds2i::sequence_collection collection_type; + typedef pisa::sequence_collection collection_type; logger() << "Loading collection from " << index_filename << std::endl; collection_type coll; mio::mmap_source m(index_filename); - ds2i::mapper::map(coll, m, ds2i::mapper::map_flags::warmup); + pisa::mapper::map(coll, m, pisa::mapper::map_flags::warmup); if (true) { logger() << "Scanning all the posting lists" << std::endl; @@ -127,10 +127,10 @@ void perftest(const char* index_filename) } int main(int argc, const char** argv) { - using ds2i::compact_elias_fano; - using ds2i::indexed_sequence; - using ds2i::partitioned_sequence; - using ds2i::uniform_partitioned_sequence; + using pisa::compact_elias_fano; + using pisa::indexed_sequence; + using pisa::partitioned_sequence; + using pisa::uniform_partitioned_sequence; if (argc != 3) { std::cerr << "Usage: " << argv[0] diff --git a/benchmarks/selective_queries.cpp b/benchmarks/selective_queries.cpp index 59211ae4f..47a56669b 100644 --- a/benchmarks/selective_queries.cpp +++ b/benchmarks/selective_queries.cpp @@ -12,7 +12,7 @@ template void selective_queries(const char* index_filename, std::string const& type) { - using namespace ds2i; + using namespace pisa; IndexType index; @@ -59,7 +59,7 @@ void selective_queries(const char* index_filename, int main(int, const char** argv) { - using namespace ds2i; + using namespace pisa; std::string type = argv[1]; const char* index_filename = argv[2]; diff --git a/include/binary_collection.hpp b/include/pisa/binary_collection.hpp similarity index 99% rename from include/binary_collection.hpp rename to include/pisa/binary_collection.hpp index 7ee150ee9..079962e89 100644 --- a/include/binary_collection.hpp +++ b/include/pisa/binary_collection.hpp @@ -13,7 +13,7 @@ #include #endif -namespace ds2i { +namespace pisa { template class base_binary_collection { diff --git a/include/binary_freq_collection.hpp b/include/pisa/binary_freq_collection.hpp similarity index 99% rename from include/binary_freq_collection.hpp rename to include/pisa/binary_freq_collection.hpp index bb949fa28..285ad5166 100644 --- a/include/binary_freq_collection.hpp +++ b/include/pisa/binary_freq_collection.hpp @@ -6,7 +6,7 @@ #include "binary_collection.hpp" -namespace ds2i { +namespace pisa { class binary_freq_collection { public: diff --git a/include/bit_vector.hpp b/include/pisa/bit_vector.hpp similarity index 99% rename from include/bit_vector.hpp rename to include/pisa/bit_vector.hpp index 24fc2f3e4..e98e5b590 100644 --- a/include/bit_vector.hpp +++ b/include/pisa/bit_vector.hpp @@ -9,7 +9,7 @@ #include "succinct/mappable_vector.hpp" -namespace ds2i { +namespace pisa { namespace detail { inline size_t words_for(uint64_t n) { return ceil_div(n, 64); } @@ -476,4 +476,4 @@ class bit_vector { mapper::mappable_vector m_bits; }; -} // namespace ds2i +} // namespace pisa diff --git a/include/bitvector_collection.hpp b/include/pisa/bitvector_collection.hpp similarity index 99% rename from include/bitvector_collection.hpp rename to include/pisa/bitvector_collection.hpp index e8cfc179a..4097b5ee9 100644 --- a/include/bitvector_collection.hpp +++ b/include/pisa/bitvector_collection.hpp @@ -4,7 +4,7 @@ #include "codec/compact_elias_fano.hpp" -namespace ds2i { +namespace pisa { class bitvector_collection { public: diff --git a/include/block_freq_index.hpp b/include/pisa/block_freq_index.hpp similarity index 99% rename from include/block_freq_index.hpp rename to include/pisa/block_freq_index.hpp index a88645458..b678a5e8e 100644 --- a/include/block_freq_index.hpp +++ b/include/pisa/block_freq_index.hpp @@ -6,7 +6,7 @@ #include "codec/compact_elias_fano.hpp" #include "block_posting_list.hpp" -namespace ds2i { +namespace pisa { template class block_freq_index { diff --git a/include/block_posting_list.hpp b/include/pisa/block_posting_list.hpp similarity index 99% rename from include/block_posting_list.hpp rename to include/pisa/block_posting_list.hpp index a42f8a10a..ac29c70f3 100644 --- a/include/block_posting_list.hpp +++ b/include/pisa/block_posting_list.hpp @@ -4,7 +4,7 @@ #include "util/util.hpp" #include "util/block_profiler.hpp" -namespace ds2i { +namespace pisa { template struct block_posting_list { diff --git a/include/codec/VarIntG8IU.h b/include/pisa/codec/VarIntG8IU.h similarity index 99% rename from include/codec/VarIntG8IU.h rename to include/pisa/codec/VarIntG8IU.h index 2280b3421..0ea3bedcd 100644 --- a/include/codec/VarIntG8IU.h +++ b/include/pisa/codec/VarIntG8IU.h @@ -15,7 +15,7 @@ #define PREDICT_FALSE(x) x #endif -namespace ds2i { +namespace pisa { class NotEnoughStorage : public std::runtime_error { public: diff --git a/include/codec/all_ones_sequence.hpp b/include/pisa/codec/all_ones_sequence.hpp similarity index 99% rename from include/codec/all_ones_sequence.hpp rename to include/pisa/codec/all_ones_sequence.hpp index fc7d19b6c..1822a6a5e 100644 --- a/include/codec/all_ones_sequence.hpp +++ b/include/pisa/codec/all_ones_sequence.hpp @@ -3,7 +3,7 @@ #include "global_parameters.hpp" #include "util/util.hpp" -namespace ds2i { +namespace pisa { struct all_ones_sequence { diff --git a/include/codec/block_codecs.hpp b/include/pisa/codec/block_codecs.hpp similarity index 99% rename from include/codec/block_codecs.hpp rename to include/pisa/codec/block_codecs.hpp index 5c07cc5a9..4a8fcb980 100644 --- a/include/codec/block_codecs.hpp +++ b/include/pisa/codec/block_codecs.hpp @@ -7,7 +7,7 @@ #include "interpolative_coding.hpp" #include "util/util.hpp" -namespace ds2i { +namespace pisa { // workaround: VariableByte::decodeArray needs the buffer size, while we // only know the number of values. It also pads to 32 bits. We need to diff --git a/include/codec/compact_elias_fano.hpp b/include/pisa/codec/compact_elias_fano.hpp similarity index 99% rename from include/codec/compact_elias_fano.hpp rename to include/pisa/codec/compact_elias_fano.hpp index 3dc4f5b23..54074b4eb 100644 --- a/include/codec/compact_elias_fano.hpp +++ b/include/pisa/codec/compact_elias_fano.hpp @@ -7,7 +7,7 @@ #include "global_parameters.hpp" #include "util/util.hpp" -namespace ds2i { +namespace pisa { struct compact_elias_fano { diff --git a/include/codec/compact_ranked_bitvector.hpp b/include/pisa/codec/compact_ranked_bitvector.hpp similarity index 99% rename from include/codec/compact_ranked_bitvector.hpp rename to include/pisa/codec/compact_ranked_bitvector.hpp index 632b08136..8adf20861 100644 --- a/include/codec/compact_ranked_bitvector.hpp +++ b/include/pisa/codec/compact_ranked_bitvector.hpp @@ -7,7 +7,7 @@ #include "global_parameters.hpp" #include "util/util.hpp" -namespace ds2i { +namespace pisa { struct compact_ranked_bitvector { diff --git a/include/codec/integer_codes.hpp b/include/pisa/codec/integer_codes.hpp similarity index 98% rename from include/codec/integer_codes.hpp rename to include/pisa/codec/integer_codes.hpp index 7b33863c6..3e0a49390 100644 --- a/include/codec/integer_codes.hpp +++ b/include/pisa/codec/integer_codes.hpp @@ -1,6 +1,6 @@ #pragma once -namespace ds2i { +namespace pisa { // note: n can be 0 void write_gamma(bit_vector_builder& bvb, uint64_t n) diff --git a/include/codec/interpolative_coding.hpp b/include/pisa/codec/interpolative_coding.hpp similarity index 99% rename from include/codec/interpolative_coding.hpp rename to include/pisa/codec/interpolative_coding.hpp index 9abab54a3..e6e47b490 100644 --- a/include/codec/interpolative_coding.hpp +++ b/include/pisa/codec/interpolative_coding.hpp @@ -5,7 +5,7 @@ #include "util/broadword.hpp" -namespace ds2i { +namespace pisa { class bit_writer { public: diff --git a/include/codec/maskedvbyte.hpp b/include/pisa/codec/maskedvbyte.hpp similarity index 97% rename from include/codec/maskedvbyte.hpp rename to include/pisa/codec/maskedvbyte.hpp index fc9776e24..da5bb5066 100644 --- a/include/codec/maskedvbyte.hpp +++ b/include/pisa/codec/maskedvbyte.hpp @@ -3,7 +3,7 @@ #include "MaskedVByte/include/varintencode.h" #include "MaskedVByte/include/varintdecode.h" -namespace ds2i { +namespace pisa { struct maskedvbyte_block { static const uint64_t block_size = 128; static void encode(uint32_t const *in, @@ -33,4 +33,4 @@ struct maskedvbyte_block { return in + read; } }; -} // namespace ds2i \ No newline at end of file +} // namespace pisa \ No newline at end of file diff --git a/include/codec/qmx.hpp b/include/pisa/codec/qmx.hpp similarity index 97% rename from include/codec/qmx.hpp rename to include/pisa/codec/qmx.hpp index 282b888f2..9f7fd422b 100644 --- a/include/codec/qmx.hpp +++ b/include/pisa/codec/qmx.hpp @@ -2,7 +2,7 @@ #include "QMX/qmx.hpp" -namespace ds2i { +namespace pisa { struct qmx_block { static const uint64_t block_size = 128; static const uint64_t overflow = 512; @@ -46,4 +46,4 @@ struct qmx_block { return in + enc_len; } }; -} // namespace ds2i +} // namespace pisa diff --git a/include/codec/simdbp.hpp b/include/pisa/codec/simdbp.hpp similarity index 97% rename from include/codec/simdbp.hpp rename to include/pisa/codec/simdbp.hpp index 86311ff52..db2b21dbc 100644 --- a/include/codec/simdbp.hpp +++ b/include/pisa/codec/simdbp.hpp @@ -3,7 +3,7 @@ extern "C" { #include "simdcomp/include/simdbitpacking.h" } -namespace ds2i { +namespace pisa { struct simdbp_block { static const uint64_t block_size = 128; static void encode(uint32_t const *in, @@ -37,4 +37,4 @@ struct simdbp_block { return in + b * sizeof(__m128i); } }; -} // namespace ds2i \ No newline at end of file +} // namespace pisa \ No newline at end of file diff --git a/include/codec/simple16.hpp b/include/pisa/codec/simple16.hpp similarity index 97% rename from include/codec/simple16.hpp rename to include/pisa/codec/simple16.hpp index 544291e69..865d9d0d7 100644 --- a/include/codec/simple16.hpp +++ b/include/pisa/codec/simple16.hpp @@ -1,7 +1,7 @@ #pragma once #include "FastPFor/headers/simple16.h" -namespace ds2i { +namespace pisa { struct simple16_block { static const uint64_t block_size = 128; @@ -37,4 +37,4 @@ struct simple16_block { return ret; } }; -} // namespace ds2i +} // namespace pisa diff --git a/include/codec/simple8b.hpp b/include/pisa/codec/simple8b.hpp similarity index 96% rename from include/codec/simple8b.hpp rename to include/pisa/codec/simple8b.hpp index 53a3182b5..6a33a0cb4 100644 --- a/include/codec/simple8b.hpp +++ b/include/pisa/codec/simple8b.hpp @@ -1,7 +1,7 @@ #pragma once #include "FastPFor/headers/simple8b.h" -namespace ds2i { +namespace pisa { struct simple8b_block { static const uint64_t block_size = 128; @@ -30,4 +30,4 @@ struct simple8b_block { codec.decodeArray(reinterpret_cast(in), 8*n, out, n)); } }; -} // namespace ds2i +} // namespace pisa diff --git a/include/codec/streamvbyte.hpp b/include/pisa/codec/streamvbyte.hpp similarity index 96% rename from include/codec/streamvbyte.hpp rename to include/pisa/codec/streamvbyte.hpp index 0ec956fe8..7798f7d72 100644 --- a/include/codec/streamvbyte.hpp +++ b/include/pisa/codec/streamvbyte.hpp @@ -1,7 +1,7 @@ #pragma once #include "streamvbyte/include/streamvbyte.h" -namespace ds2i { +namespace pisa { struct streamvbyte_block { static const uint64_t block_size = 128; @@ -25,4 +25,4 @@ struct streamvbyte_block { return in + read; } }; -} // namespace ds2i \ No newline at end of file +} // namespace pisa \ No newline at end of file diff --git a/include/codec/strict_elias_fano.hpp b/include/pisa/codec/strict_elias_fano.hpp similarity index 99% rename from include/codec/strict_elias_fano.hpp rename to include/pisa/codec/strict_elias_fano.hpp index f731fd1ed..7884098f4 100644 --- a/include/codec/strict_elias_fano.hpp +++ b/include/pisa/codec/strict_elias_fano.hpp @@ -5,7 +5,7 @@ #include "codec/compact_elias_fano.hpp" #include "util/util.hpp" -namespace ds2i { +namespace pisa { struct strict_elias_fano { diff --git a/include/codec/varintgb.hpp b/include/pisa/codec/varintgb.hpp similarity index 99% rename from include/codec/varintgb.hpp rename to include/pisa/codec/varintgb.hpp index fcc08c565..e023d37b8 100644 --- a/include/codec/varintgb.hpp +++ b/include/pisa/codec/varintgb.hpp @@ -4,7 +4,7 @@ using namespace std; -namespace ds2i { +namespace pisa { template class VarIntGB { @@ -264,4 +264,4 @@ struct varintgb_block { return read + in; } }; -} // namespace ds2i \ No newline at end of file +} // namespace pisa \ No newline at end of file diff --git a/include/configuration.hpp b/include/pisa/configuration.hpp similarity index 99% rename from include/configuration.hpp rename to include/pisa/configuration.hpp index ea6c1a5ac..78d272a10 100644 --- a/include/configuration.hpp +++ b/include/pisa/configuration.hpp @@ -7,7 +7,7 @@ #include "boost/lexical_cast.hpp" -namespace ds2i { +namespace pisa { class configuration { public: diff --git a/include/dec_time_prediction.hpp b/include/pisa/dec_time_prediction.hpp similarity index 99% rename from include/dec_time_prediction.hpp rename to include/pisa/dec_time_prediction.hpp index 59621395f..970d2e5f1 100644 --- a/include/dec_time_prediction.hpp +++ b/include/pisa/dec_time_prediction.hpp @@ -12,7 +12,7 @@ #define DS2I_FEATURE_TYPES (n)(size)(sum_of_logs)(entropy)(nonzeros)(max_b)(pfor_b)(pfor_exceptions) -namespace ds2i { namespace time_prediction { +namespace pisa { namespace time_prediction { constexpr size_t num_features = BOOST_PP_SEQ_SIZE(DS2I_FEATURE_TYPES); diff --git a/include/ds2i_config.hpp.in b/include/pisa/ds2i_config.hpp.in similarity index 100% rename from include/ds2i_config.hpp.in rename to include/pisa/ds2i_config.hpp.in diff --git a/include/enumerate.hpp b/include/pisa/enumerate.hpp similarity index 96% rename from include/enumerate.hpp rename to include/pisa/enumerate.hpp index 14927fe3b..c87b5f231 100644 --- a/include/enumerate.hpp +++ b/include/pisa/enumerate.hpp @@ -1,6 +1,6 @@ #pragma once -namespace ds2i { +namespace pisa { template class Enumerator_Index { @@ -45,4 +45,4 @@ template return Enumerator_Range{{first}, {last}}; } -} // namespace ds2i +} // namespace pisa diff --git a/include/filesystem.hpp b/include/pisa/filesystem.hpp similarity index 92% rename from include/filesystem.hpp rename to include/pisa/filesystem.hpp index 175ec6e4f..226cab02d 100644 --- a/include/filesystem.hpp +++ b/include/pisa/filesystem.hpp @@ -1,6 +1,6 @@ #pragma once -namespace ds2i { +namespace pisa { [[nodiscard]] auto ls(boost::filesystem::path dir, std::function predicate) @@ -17,4 +17,4 @@ namespace ds2i { return files; } -} // namespace ds2i +} // namespace pisa diff --git a/include/forward_index.hpp b/include/pisa/forward_index.hpp similarity index 99% rename from include/forward_index.hpp rename to include/pisa/forward_index.hpp index 350cf55d6..37db45344 100644 --- a/include/forward_index.hpp +++ b/include/pisa/forward_index.hpp @@ -9,7 +9,7 @@ #include "codec/varintgb.hpp" #include "util/progress.hpp" -namespace ds2i { +namespace pisa { using id_type = std::uint32_t; @@ -148,4 +148,4 @@ class forward_index : public std::vector> { bool m_compressed; }; -} // namespace ds2i +} // namespace pisa diff --git a/include/forward_index_builder.hpp b/include/pisa/forward_index_builder.hpp similarity index 99% rename from include/forward_index_builder.hpp rename to include/pisa/forward_index_builder.hpp index 75be737a7..311d75f18 100644 --- a/include/forward_index_builder.hpp +++ b/include/pisa/forward_index_builder.hpp @@ -23,7 +23,7 @@ #include "warcpp/warcpp.hpp" #include "util/util.hpp" -namespace ds2i { +namespace pisa { using process_term_function_type = std::function; using process_content_function_type = @@ -324,9 +324,9 @@ class Plaintext_Record { std::string m_url; }; -} // namespace ds2i +} // namespace pisa -auto operator>>(std::istream &is, ds2i::Plaintext_Record &record) -> std::istream & +auto operator>>(std::istream &is, pisa::Plaintext_Record &record) -> std::istream & { is >> record.trecid(); std::getline(is, record.content()); diff --git a/include/freq_index.hpp b/include/pisa/freq_index.hpp similarity index 99% rename from include/freq_index.hpp rename to include/pisa/freq_index.hpp index 4f439a4ce..3aaf0bed6 100644 --- a/include/freq_index.hpp +++ b/include/pisa/freq_index.hpp @@ -7,7 +7,7 @@ #include "codec/integer_codes.hpp" #include "global_parameters.hpp" -namespace ds2i { +namespace pisa { template class freq_index { diff --git a/include/global_parameters.hpp b/include/pisa/global_parameters.hpp similarity index 98% rename from include/global_parameters.hpp rename to include/pisa/global_parameters.hpp index 42cec1930..634a9eaf3 100644 --- a/include/global_parameters.hpp +++ b/include/pisa/global_parameters.hpp @@ -1,6 +1,6 @@ #pragma once -namespace ds2i { +namespace pisa { struct global_parameters { global_parameters() diff --git a/include/index_types.hpp b/include/pisa/index_types.hpp similarity index 71% rename from include/index_types.hpp rename to include/pisa/index_types.hpp index 5619559d4..7051563e1 100644 --- a/include/index_types.hpp +++ b/include/pisa/index_types.hpp @@ -22,7 +22,7 @@ #include "sequence/positive_sequence.hpp" #include "sequence/uniform_partitioned_sequence.hpp" -namespace ds2i { +namespace pisa { using ef_index = freq_index>; using single_index = freq_index>; @@ -33,19 +33,19 @@ using uniform_index = freq_index, using opt_index = freq_index, positive_sequence>>; -using block_optpfor_index = block_freq_index; -using block_varintg8iu_index = block_freq_index; -using block_streamvbyte_index = block_freq_index; -using block_maskedvbyte_index = block_freq_index; -using block_varintgb_index = block_freq_index; -using block_interpolative_index = block_freq_index; -using block_qmx_index = block_freq_index; -using block_simple8b_index = block_freq_index; -using block_simple16_index = block_freq_index; -using block_simdbp_index = block_freq_index; -using block_mixed_index = block_freq_index; - -} // namespace ds2i +using block_optpfor_index = block_freq_index; +using block_varintg8iu_index = block_freq_index; +using block_streamvbyte_index = block_freq_index; +using block_maskedvbyte_index = block_freq_index; +using block_varintgb_index = block_freq_index; +using block_interpolative_index = block_freq_index; +using block_qmx_index = block_freq_index; +using block_simple8b_index = block_freq_index; +using block_simple16_index = block_freq_index; +using block_simdbp_index = block_freq_index; +using block_mixed_index = block_freq_index; + +} // namespace pisa #define DS2I_INDEX_TYPES \ (ef)(single)(uniform)(opt)(block_optpfor)(block_varintg8iu)(block_streamvbyte)( \ diff --git a/include/invert.hpp b/include/pisa/invert.hpp similarity index 98% rename from include/invert.hpp rename to include/pisa/invert.hpp index 7f5ce9e19..b13c0bfd6 100644 --- a/include/invert.hpp +++ b/include/pisa/invert.hpp @@ -20,7 +20,7 @@ #include "enumerate.hpp" #include "util/util.hpp" -namespace ds2i { +namespace pisa { template class Integer { @@ -71,20 +71,20 @@ class Integer { T m_val; }; -} // namespace ds2i +} // namespace pisa namespace std { template -struct hash> { - constexpr auto operator()(ds2i::Integer const &key) const noexcept { +struct hash> { + constexpr auto operator()(pisa::Integer const &key) const noexcept { return hash{}(static_cast(key)); } }; } // namespace std -namespace ds2i { +namespace pisa { template std::ostream &operator<<(std::ostream &os, Integer id) { @@ -382,4 +382,4 @@ void invert_forward_index(std::string const &input_basename, } // namespace invert -} // namespace ds2i +} // namespace pisa diff --git a/include/mixed_block.hpp b/include/pisa/mixed_block.hpp similarity index 99% rename from include/mixed_block.hpp rename to include/pisa/mixed_block.hpp index 75ab9b21a..df33dfd0d 100644 --- a/include/mixed_block.hpp +++ b/include/pisa/mixed_block.hpp @@ -5,7 +5,7 @@ #include "codec/block_codecs.hpp" #include "dec_time_prediction.hpp" -namespace ds2i { +namespace pisa { struct mixed_block { @@ -216,7 +216,7 @@ namespace ds2i { } }; - typedef std::vector predictors_vec_type; + typedef std::vector predictors_vec_type; predictors_vec_type load_predictors(const char* predictors_filename) { diff --git a/include/optimal_partition.hpp b/include/pisa/optimal_partition.hpp similarity index 99% rename from include/optimal_partition.hpp rename to include/pisa/optimal_partition.hpp index 40eec48b9..ab536d2a2 100644 --- a/include/optimal_partition.hpp +++ b/include/pisa/optimal_partition.hpp @@ -5,7 +5,7 @@ #include #include "util/util.hpp" -namespace ds2i { +namespace pisa { typedef uint32_t posting_t ; typedef uint64_t cost_t; diff --git a/include/parsing/html.hpp b/include/pisa/parsing/html.hpp similarity index 94% rename from include/parsing/html.hpp rename to include/pisa/parsing/html.hpp index 92df6c37f..1a88e3a73 100644 --- a/include/parsing/html.hpp +++ b/include/pisa/parsing/html.hpp @@ -3,7 +3,7 @@ #include #include -namespace ds2i::parsing::html { +namespace pisa::parsing::html { [[nodiscard]] auto cleantext(GumboNode *node) -> std::string { @@ -35,4 +35,4 @@ namespace ds2i::parsing::html { return content; } -} // namespace ds2i::parsing::html +} // namespace pisa::parsing::html diff --git a/include/query/algorithm/and_query.hpp b/include/pisa/query/algorithm/and_query.hpp similarity index 85% rename from include/query/algorithm/and_query.hpp rename to include/pisa/query/algorithm/and_query.hpp index 31adc8055..f9d36f865 100644 --- a/include/query/algorithm/and_query.hpp +++ b/include/pisa/query/algorithm/and_query.hpp @@ -1,5 +1,7 @@ #pragma once +namespace pisa { + template struct and_query { @@ -10,7 +12,7 @@ struct and_query { remove_duplicate_terms(terms); typedef typename Index::document_enumerator enum_type; - std::vector enums; + std::vector enums; enums.reserve(terms.size()); for (auto term : terms) { @@ -22,15 +24,15 @@ struct and_query { return lhs.size() < rhs.size(); }); - uint64_t results = 0; + uint64_t results = 0; uint64_t candidate = enums[0].docid(); - size_t i = 1; + size_t i = 1; while (candidate < index.num_docs()) { for (; i < enums.size(); ++i) { enums[i].next_geq(candidate); if (enums[i].docid() != candidate) { candidate = enums[i].docid(); - i = 0; + i = 0; break; } } @@ -45,10 +47,11 @@ struct and_query { } enums[0].next(); candidate = enums[0].docid(); - i = 1; + i = 1; } } return results; } }; +} // namespace pisa \ No newline at end of file diff --git a/include/query/algorithm/block_max_maxscore_query.hpp b/include/pisa/query/algorithm/block_max_maxscore_query.hpp similarity index 99% rename from include/query/algorithm/block_max_maxscore_query.hpp rename to include/pisa/query/algorithm/block_max_maxscore_query.hpp index 3b532cd8e..5ef880321 100644 --- a/include/query/algorithm/block_max_maxscore_query.hpp +++ b/include/pisa/query/algorithm/block_max_maxscore_query.hpp @@ -1,5 +1,7 @@ #pragma once +namespace pisa { + template struct block_max_maxscore_query { @@ -130,3 +132,4 @@ struct block_max_maxscore_query { WandType const *m_wdata; topk_queue m_topk; }; +} // namespace pisa \ No newline at end of file diff --git a/include/query/algorithm/block_max_wand_query.hpp b/include/pisa/query/algorithm/block_max_wand_query.hpp similarity index 88% rename from include/query/algorithm/block_max_wand_query.hpp rename to include/pisa/query/algorithm/block_max_wand_query.hpp index 077cdedc2..1bbd0c396 100644 --- a/include/query/algorithm/block_max_wand_query.hpp +++ b/include/pisa/query/algorithm/block_max_wand_query.hpp @@ -1,5 +1,7 @@ #pragma once +namespace pisa { + template struct block_max_wand_query { typedef bm25 scorer_type; @@ -12,24 +14,24 @@ struct block_max_wand_query { if (terms.empty()) return 0; - auto query_term_freqs = query_freqs(terms); - uint64_t num_docs = index.num_docs(); - typedef typename Index::document_enumerator enum_type; + auto query_term_freqs = query_freqs(terms); + uint64_t num_docs = index.num_docs(); + typedef typename Index::document_enumerator enum_type; typedef typename WandType::wand_data_enumerator wdata_enum; struct scored_enum { - enum_type docs_enum; + enum_type docs_enum; wdata_enum w; - float q_weight; - float max_weight; + float q_weight; + float max_weight; }; std::vector enums; enums.reserve(query_term_freqs.size()); for (auto term : query_term_freqs) { - auto list = index[term.first]; - auto w_enum = m_wdata->getenum(term.first); + auto list = index[term.first]; + auto w_enum = m_wdata->getenum(term.first); auto q_weight = scorer_type::query_term_weight(term.second, list.size(), num_docs); float max_weight = q_weight * m_wdata->max_term_weight(term.first); @@ -55,10 +57,10 @@ struct block_max_wand_query { while (true) { // find pivot - float upper_bound = 0.f; - size_t pivot; - bool found_pivot = false; - uint64_t pivot_id = num_docs; + float upper_bound = 0.f; + size_t pivot; + bool found_pivot = false; + uint64_t pivot_id = num_docs; for (pivot = 0; pivot < ordered_enums.size(); ++pivot) { if (ordered_enums[pivot]->docs_enum.docid() == num_docs) { @@ -68,7 +70,7 @@ struct block_max_wand_query { upper_bound += ordered_enums[pivot]->max_weight; if (m_topk.would_enter(upper_bound)) { found_pivot = true; - pivot_id = ordered_enums[pivot]->docs_enum.docid(); + pivot_id = ordered_enums[pivot]->docs_enum.docid(); for (; pivot + 1 < ordered_enums.size() && ordered_enums[pivot + 1]->docs_enum.docid() == pivot_id; ++pivot) @@ -96,7 +98,7 @@ struct block_max_wand_query { // check if pivot is a possible match if (pivot_id == ordered_enums[0]->docs_enum.docid()) { - float score = 0; + float score = 0; float norm_len = m_wdata->norm_len(pivot_id); for (scored_enum *en : ordered_enums) { @@ -150,7 +152,7 @@ struct block_max_wand_query { for (uint64_t i = 0; i < pivot; i++) { if (ordered_enums[i]->q_weight > q_weight) { next_list = i; - q_weight = ordered_enums[i]->q_weight; + q_weight = ordered_enums[i]->q_weight; } } @@ -203,6 +205,7 @@ struct block_max_wand_query { private: WandType const *m_wdata; - topk_queue m_topk; + topk_queue m_topk; }; +} // namespace pisa \ No newline at end of file diff --git a/include/query/algorithm/maxscore_query.hpp b/include/pisa/query/algorithm/maxscore_query.hpp similarity index 89% rename from include/query/algorithm/maxscore_query.hpp rename to include/pisa/query/algorithm/maxscore_query.hpp index 0e6706ddd..37f45c999 100644 --- a/include/query/algorithm/maxscore_query.hpp +++ b/include/pisa/query/algorithm/maxscore_query.hpp @@ -1,5 +1,7 @@ #pragma once +namespace pisa { + template struct maxscore_query { @@ -15,20 +17,20 @@ struct maxscore_query { auto query_term_freqs = query_freqs(terms); - uint64_t num_docs = index.num_docs(); + uint64_t num_docs = index.num_docs(); typedef typename Index::document_enumerator enum_type; struct scored_enum { enum_type docs_enum; - float q_weight; - float max_weight; + float q_weight; + float max_weight; }; std::vector enums; enums.reserve(query_term_freqs.size()); for (auto term : query_term_freqs) { - auto list = index[term.first]; - auto q_weight = scorer_type::query_term_weight(term.second, list.size(), num_docs); + auto list = index[term.first]; + auto q_weight = scorer_type::query_term_weight(term.second, list.size(), num_docs); auto max_weight = q_weight * m_wdata->max_term_weight(term.first); enums.push_back(scored_enum{std::move(list), q_weight, max_weight}); } @@ -61,8 +63,8 @@ struct maxscore_query { ->docs_enum.docid(); while (non_essential_lists < ordered_enums.size() && cur_doc < index.num_docs()) { - float score = 0; - float norm_len = m_wdata->norm_len(cur_doc); + float score = 0; + float norm_len = m_wdata->norm_len(cur_doc); uint64_t next_doc = index.num_docs(); for (size_t i = non_essential_lists; i < ordered_enums.size(); ++i) { if (ordered_enums[i]->docs_enum.docid() == cur_doc) { @@ -108,5 +110,7 @@ struct maxscore_query { private: WandType const *m_wdata; - topk_queue m_topk; -}; \ No newline at end of file + topk_queue m_topk; +}; + +} // namespace pisa \ No newline at end of file diff --git a/include/query/algorithm/or_query.hpp b/include/pisa/query/algorithm/or_query.hpp similarity index 93% rename from include/query/algorithm/or_query.hpp rename to include/pisa/query/algorithm/or_query.hpp index 7c73cda4d..452bed52e 100644 --- a/include/query/algorithm/or_query.hpp +++ b/include/pisa/query/algorithm/or_query.hpp @@ -1,5 +1,7 @@ #pragma once +namespace pisa { + template struct or_query { @@ -10,7 +12,7 @@ struct or_query { remove_duplicate_terms(terms); typedef typename Index::document_enumerator enum_type; - std::vector enums; + std::vector enums; enums.reserve(terms.size()); for (auto term : terms) { @@ -45,4 +47,6 @@ struct or_query { return results; } -}; \ No newline at end of file +}; + +} // namespace pisa \ No newline at end of file diff --git a/include/query/algorithm/ranked_and_query.hpp b/include/pisa/query/algorithm/ranked_and_query.hpp similarity index 86% rename from include/query/algorithm/ranked_and_query.hpp rename to include/pisa/query/algorithm/ranked_and_query.hpp index 72fd89194..3d330cc15 100644 --- a/include/query/algorithm/ranked_and_query.hpp +++ b/include/pisa/query/algorithm/ranked_and_query.hpp @@ -1,5 +1,7 @@ #pragma once +namespace pisa { + template struct ranked_and_query { @@ -16,18 +18,18 @@ struct ranked_and_query { auto query_term_freqs = query_freqs(terms); - uint64_t num_docs = index.num_docs(); + uint64_t num_docs = index.num_docs(); typedef typename Index::document_enumerator enum_type; struct scored_enum { enum_type docs_enum; - float q_weight; + float q_weight; }; std::vector enums; enums.reserve(query_term_freqs.size()); for (auto term : query_term_freqs) { - auto list = index[term.first]; + auto list = index[term.first]; auto q_weight = scorer_type::query_term_weight(term.second, list.size(), num_docs); enums.push_back(scored_enum{std::move(list), q_weight}); } @@ -38,20 +40,20 @@ struct ranked_and_query { }); uint64_t candidate = enums[0].docs_enum.docid(); - size_t i = 1; + size_t i = 1; while (candidate < index.num_docs()) { for (; i < enums.size(); ++i) { enums[i].docs_enum.next_geq(candidate); if (enums[i].docs_enum.docid() != candidate) { candidate = enums[i].docs_enum.docid(); - i = 0; + i = 0; break; } } if (i == enums.size()) { float norm_len = m_wdata->norm_len(candidate); - float score = 0; + float score = 0; for (i = 0; i < enums.size(); ++i) { score += enums[i].q_weight * scorer_type::doc_term_weight(enums[i].docs_enum.freq(), norm_len); @@ -65,7 +67,7 @@ struct ranked_and_query { enums[0].docs_enum.next(); candidate = enums[0].docs_enum.docid(); - i = 1; + i = 1; } } @@ -79,6 +81,7 @@ struct ranked_and_query { private: WandType const *m_wdata; - topk_queue m_topk; + topk_queue m_topk; }; +} // namespace pisa \ No newline at end of file diff --git a/include/query/algorithm/ranked_or_query.hpp b/include/pisa/query/algorithm/ranked_or_query.hpp similarity index 86% rename from include/query/algorithm/ranked_or_query.hpp rename to include/pisa/query/algorithm/ranked_or_query.hpp index b9c3e9bee..a5ab88216 100644 --- a/include/query/algorithm/ranked_or_query.hpp +++ b/include/pisa/query/algorithm/ranked_or_query.hpp @@ -1,5 +1,7 @@ #pragma once +namespace pisa { + template struct ranked_or_query { @@ -15,18 +17,18 @@ struct ranked_or_query { auto query_term_freqs = query_freqs(terms); - uint64_t num_docs = index.num_docs(); + uint64_t num_docs = index.num_docs(); typedef typename Index::document_enumerator enum_type; struct scored_enum { enum_type docs_enum; - float q_weight; + float q_weight; }; std::vector enums; enums.reserve(query_term_freqs.size()); for (auto term : query_term_freqs) { - auto list = index[term.first]; + auto list = index[term.first]; auto q_weight = scorer_type::query_term_weight(term.second, list.size(), num_docs); enums.push_back(scored_enum{std::move(list), q_weight}); } @@ -40,8 +42,8 @@ struct ranked_or_query { ->docs_enum.docid(); while (cur_doc < index.num_docs()) { - float score = 0; - float norm_len = m_wdata->norm_len(cur_doc); + float score = 0; + float norm_len = m_wdata->norm_len(cur_doc); uint64_t next_doc = index.num_docs(); for (size_t i = 0; i < enums.size(); ++i) { if (enums[i].docs_enum.docid() == cur_doc) { @@ -66,6 +68,7 @@ struct ranked_or_query { private: WandType const *m_wdata; - topk_queue m_topk; + topk_queue m_topk; }; +} // namespace pisa \ No newline at end of file diff --git a/include/query/algorithm/wand_query.hpp b/include/pisa/query/algorithm/wand_query.hpp similarity index 91% rename from include/query/algorithm/wand_query.hpp rename to include/pisa/query/algorithm/wand_query.hpp index f03e88379..2194cdde2 100644 --- a/include/query/algorithm/wand_query.hpp +++ b/include/pisa/query/algorithm/wand_query.hpp @@ -1,5 +1,7 @@ #pragma once +namespace pisa { + template struct wand_query { @@ -15,19 +17,19 @@ struct wand_query { auto query_term_freqs = query_freqs(terms); - uint64_t num_docs = index.num_docs(); + uint64_t num_docs = index.num_docs(); typedef typename Index::document_enumerator enum_type; struct scored_enum { enum_type docs_enum; - float q_weight; - float max_weight; + float q_weight; + float max_weight; }; std::vector enums; enums.reserve(query_term_freqs.size()); for (auto term : query_term_freqs) { - auto list = index[term.first]; + auto list = index[term.first]; auto q_weight = scorer_type::query_term_weight(term.second, list.size(), num_docs); auto max_weight = q_weight * m_wdata->max_term_weight(term.first); @@ -51,9 +53,9 @@ struct wand_query { sort_enums(); while (true) { // find pivot - float upper_bound = 0; + float upper_bound = 0; size_t pivot; - bool found_pivot = false; + bool found_pivot = false; for (pivot = 0; pivot < ordered_enums.size(); ++pivot) { if (ordered_enums[pivot]->docs_enum.docid() == num_docs) { break; @@ -73,7 +75,7 @@ struct wand_query { // check if pivot is a possible match uint64_t pivot_id = ordered_enums[pivot]->docs_enum.docid(); if (pivot_id == ordered_enums[0]->docs_enum.docid()) { - float score = 0; + float score = 0; float norm_len = m_wdata->norm_len(pivot_id); for (scored_enum *en : ordered_enums) { if (en->docs_enum.docid() != pivot_id) { @@ -113,6 +115,7 @@ struct wand_query { private: WandType const *m_wdata; - topk_queue m_topk; + topk_queue m_topk; }; +} // namespace pisa \ No newline at end of file diff --git a/include/query/queries.hpp b/include/pisa/query/queries.hpp similarity index 87% rename from include/query/queries.hpp rename to include/pisa/query/queries.hpp index df5a561b1..6da30f2ab 100644 --- a/include/query/queries.hpp +++ b/include/pisa/query/queries.hpp @@ -10,8 +10,8 @@ #include "wand_data_compressed.hpp" #include "wand_data_raw.hpp" -namespace ds2i { -typedef uint32_t term_id_type; +namespace pisa { +typedef uint32_t term_id_type; typedef std::vector term_id_vec; bool read_query(term_id_vec &ret, std::istream &is = std::cin) { @@ -20,7 +20,7 @@ bool read_query(term_id_vec &ret, std::istream &is = std::cin) { if (!std::getline(is, line)) return false; std::istringstream iline(line); - term_id_type term_id; + term_id_type term_id; while (iline >> term_id) { ret.push_back(term_id); } @@ -33,9 +33,8 @@ void remove_duplicate_terms(term_id_vec &terms) { terms.erase(std::unique(terms.begin(), terms.end()), terms.end()); } - typedef std::pair term_freq_pair; -typedef std::vector term_freq_vec; +typedef std::vector term_freq_vec; term_freq_vec query_freqs(term_id_vec terms) { term_freq_vec query_term_freqs; @@ -52,14 +51,13 @@ term_freq_vec query_freqs(term_id_vec terms) { return query_term_freqs; } +} // namespace pisa + #include "algorithm/and_query.hpp" +#include "algorithm/block_max_maxscore_query.hpp" #include "algorithm/block_max_wand_query.hpp" #include "algorithm/maxscore_query.hpp" -#include "algorithm/block_max_maxscore_query.hpp" #include "algorithm/or_query.hpp" #include "algorithm/ranked_and_query.hpp" #include "algorithm/ranked_or_query.hpp" -#include "algorithm/wand_query.hpp" - - -} // namespace ds2i +#include "algorithm/wand_query.hpp" \ No newline at end of file diff --git a/include/recursive_graph_bisection.hpp b/include/pisa/recursive_graph_bisection.hpp similarity index 99% rename from include/recursive_graph_bisection.hpp rename to include/pisa/recursive_graph_bisection.hpp index 000f95d53..bb82bad02 100644 --- a/include/recursive_graph_bisection.hpp +++ b/include/pisa/recursive_graph_bisection.hpp @@ -16,7 +16,7 @@ #include "util/progress.hpp" #include "util/single_init_vector.hpp" -namespace ds2i { +namespace pisa { const Log2<4096> log2; namespace bp { @@ -310,4 +310,4 @@ void recursive_graph_bisection(std::vector> nodes, pr } } -} // namespace ds2i +} // namespace pisa diff --git a/include/score_opt_partition.hpp b/include/pisa/score_opt_partition.hpp similarity index 99% rename from include/score_opt_partition.hpp rename to include/pisa/score_opt_partition.hpp index 4a5106fab..fc8b9cb30 100644 --- a/include/score_opt_partition.hpp +++ b/include/pisa/score_opt_partition.hpp @@ -7,7 +7,7 @@ #include "util/util.hpp" #include "scorer/bm25.hpp" -namespace ds2i { +namespace pisa { typedef uint32_t posting_t ; typedef float wand_cost_t; diff --git a/include/scorer/bm25.hpp b/include/pisa/scorer/bm25.hpp similarity index 97% rename from include/scorer/bm25.hpp rename to include/pisa/scorer/bm25.hpp index 08aa18f40..65dfe201f 100644 --- a/include/scorer/bm25.hpp +++ b/include/pisa/scorer/bm25.hpp @@ -2,7 +2,7 @@ #include -namespace ds2i { +namespace pisa { struct bm25 { static constexpr float b = 0.5; diff --git a/include/sequence/indexed_sequence.hpp b/include/pisa/sequence/indexed_sequence.hpp similarity index 99% rename from include/sequence/indexed_sequence.hpp rename to include/pisa/sequence/indexed_sequence.hpp index 86cf994ae..af3a09c43 100644 --- a/include/sequence/indexed_sequence.hpp +++ b/include/pisa/sequence/indexed_sequence.hpp @@ -8,7 +8,7 @@ #include "codec/compact_ranked_bitvector.hpp" #include "global_parameters.hpp" -namespace ds2i { +namespace pisa { struct indexed_sequence { @@ -143,4 +143,4 @@ struct indexed_sequence { m_enumerator; }; }; -} // namespace ds2i +} // namespace pisa diff --git a/include/sequence/partitioned_sequence.hpp b/include/pisa/sequence/partitioned_sequence.hpp similarity index 99% rename from include/sequence/partitioned_sequence.hpp rename to include/pisa/sequence/partitioned_sequence.hpp index 9c30bd6c5..43777de5a 100644 --- a/include/sequence/partitioned_sequence.hpp +++ b/include/pisa/sequence/partitioned_sequence.hpp @@ -11,7 +11,7 @@ #include "util/util.hpp" #include "optimal_partition.hpp" -namespace ds2i { +namespace pisa { template struct partitioned_sequence { diff --git a/include/sequence/positive_sequence.hpp b/include/pisa/sequence/positive_sequence.hpp similarity index 99% rename from include/sequence/positive_sequence.hpp rename to include/pisa/sequence/positive_sequence.hpp index 8c461c7c3..08da63de5 100644 --- a/include/sequence/positive_sequence.hpp +++ b/include/pisa/sequence/positive_sequence.hpp @@ -4,7 +4,7 @@ #include "sequence/strict_sequence.hpp" #include "util/util.hpp" -namespace ds2i { +namespace pisa { template struct positive_sequence { diff --git a/include/sequence/strict_sequence.hpp b/include/pisa/sequence/strict_sequence.hpp similarity index 99% rename from include/sequence/strict_sequence.hpp rename to include/pisa/sequence/strict_sequence.hpp index 39d83ae46..fa11b34cb 100644 --- a/include/sequence/strict_sequence.hpp +++ b/include/pisa/sequence/strict_sequence.hpp @@ -8,7 +8,7 @@ #include "codec/strict_elias_fano.hpp" #include "global_parameters.hpp" -namespace ds2i { +namespace pisa { struct strict_sequence { @@ -152,4 +152,4 @@ struct strict_sequence { m_enumerator; }; }; -} // namespace ds2i +} // namespace pisa diff --git a/include/sequence/uniform_partitioned_sequence.hpp b/include/pisa/sequence/uniform_partitioned_sequence.hpp similarity index 99% rename from include/sequence/uniform_partitioned_sequence.hpp rename to include/pisa/sequence/uniform_partitioned_sequence.hpp index fe6f9c26e..849833755 100644 --- a/include/sequence/uniform_partitioned_sequence.hpp +++ b/include/pisa/sequence/uniform_partitioned_sequence.hpp @@ -8,7 +8,7 @@ #include "codec/integer_codes.hpp" #include "util/util.hpp" -namespace ds2i { +namespace pisa { template struct uniform_partitioned_sequence { diff --git a/include/sequence_collection.hpp b/include/pisa/sequence_collection.hpp similarity index 99% rename from include/sequence_collection.hpp rename to include/pisa/sequence_collection.hpp index 8f26b0cf8..8378acd50 100644 --- a/include/sequence_collection.hpp +++ b/include/pisa/sequence_collection.hpp @@ -6,7 +6,7 @@ #include "global_parameters.hpp" #include "util/semiasync_queue.hpp" -namespace ds2i { +namespace pisa { template class sequence_collection { diff --git a/include/succinct/mappable_vector.hpp b/include/pisa/succinct/mappable_vector.hpp similarity index 98% rename from include/succinct/mappable_vector.hpp rename to include/pisa/succinct/mappable_vector.hpp index 6193a8b41..224ee3e13 100644 --- a/include/succinct/mappable_vector.hpp +++ b/include/pisa/succinct/mappable_vector.hpp @@ -11,7 +11,7 @@ #include "util/intrinsics.hpp" -namespace ds2i { namespace mapper { +namespace pisa { namespace mapper { namespace detail { class freeze_visitor; diff --git a/include/succinct/mapper.hpp b/include/pisa/succinct/mapper.hpp similarity index 99% rename from include/succinct/mapper.hpp rename to include/pisa/succinct/mapper.hpp index 05e8bef65..c5d89d2da 100644 --- a/include/succinct/mapper.hpp +++ b/include/pisa/succinct/mapper.hpp @@ -7,7 +7,7 @@ #include "succinct/mappable_vector.hpp" -namespace ds2i { +namespace pisa { namespace mapper { struct map_flags { @@ -259,4 +259,4 @@ size_node_ptr size_tree_of(T &val, const char *friendly_name = "") { } } // namespace mapper -} // namespace ds2i +} // namespace pisa diff --git a/include/topk_queue.hpp b/include/pisa/topk_queue.hpp similarity index 94% rename from include/topk_queue.hpp rename to include/pisa/topk_queue.hpp index 318fe86ad..2ff4af9f8 100644 --- a/include/topk_queue.hpp +++ b/include/pisa/topk_queue.hpp @@ -1,6 +1,6 @@ #pragma once -namespace ds2i { +namespace pisa { struct topk_queue { using entry_type = std::pair; @@ -17,7 +17,6 @@ struct topk_queue { bool insert(float score) { return insert(score, 0); } bool insert(float score, uint64_t docid) { - //std::cerr << "insert: " << score << "(" << m_threshold << ")\n"; if (DS2I_UNLIKELY(score < m_threshold)) { return false; } @@ -63,4 +62,4 @@ struct topk_queue { std::vector m_q; }; -} // namespace ds2i +} // namespace pisa diff --git a/include/util/block_profiler.hpp b/include/pisa/util/block_profiler.hpp similarity index 98% rename from include/util/block_profiler.hpp rename to include/pisa/util/block_profiler.hpp index f39768bd1..e26aa6fc7 100644 --- a/include/util/block_profiler.hpp +++ b/include/pisa/util/block_profiler.hpp @@ -4,7 +4,7 @@ #include #include -namespace ds2i { +namespace pisa { class block_profiler { public: diff --git a/include/util/broadword.hpp b/include/pisa/util/broadword.hpp similarity index 99% rename from include/util/broadword.hpp rename to include/pisa/util/broadword.hpp index 948ff4284..e9804fe99 100644 --- a/include/util/broadword.hpp +++ b/include/pisa/util/broadword.hpp @@ -4,7 +4,7 @@ #include "util/intrinsics.hpp" #include "util/tables.hpp" -namespace ds2i { namespace broadword { +namespace pisa { namespace broadword { static const uint64_t ones_step_4 = 0x1111111111111111ULL; static const uint64_t ones_step_8 = 0x0101010101010101ULL; diff --git a/include/util/index_build_utils.hpp b/include/pisa/util/index_build_utils.hpp similarity index 98% rename from include/util/index_build_utils.hpp rename to include/pisa/util/index_build_utils.hpp index 1fd096b7e..b4e672dbd 100644 --- a/include/util/index_build_utils.hpp +++ b/include/pisa/util/index_build_utils.hpp @@ -5,7 +5,7 @@ #include "util/progress.hpp" #include "util/util.hpp" -namespace ds2i { +namespace pisa { template void get_size_stats(freq_index &coll, @@ -92,7 +92,7 @@ void reorder_inverted_index(const std::string & input_basename, binary_freq_collection input(input_basename.c_str()); std::vector> pl; - ds2i::progress reorder_progress("Reorder inverted index", + pisa::progress reorder_progress("Reorder inverted index", std::distance(input.begin(), input.end())); for (const auto &seq : input) { @@ -161,4 +161,4 @@ void sample_inverted_index(const std::string &input_basename, } } -} // namespace ds2i +} // namespace pisa diff --git a/include/util/intrinsics.hpp b/include/pisa/util/intrinsics.hpp similarity index 99% rename from include/util/intrinsics.hpp rename to include/pisa/util/intrinsics.hpp index b6a396f43..dac71c143 100644 --- a/include/util/intrinsics.hpp +++ b/include/pisa/util/intrinsics.hpp @@ -16,7 +16,7 @@ #define __INTRIN_INLINE inline #endif -namespace ds2i { +namespace pisa { namespace intrinsics { __INTRIN_INLINE uint64_t byteswap64(uint64_t value) { diff --git a/include/util/log.hpp b/include/pisa/util/log.hpp similarity index 92% rename from include/util/log.hpp rename to include/pisa/util/log.hpp index 5edc68409..16ae9f05f 100644 --- a/include/util/log.hpp +++ b/include/pisa/util/log.hpp @@ -1,6 +1,6 @@ #pragma once -namespace ds2i { +namespace pisa { template class Log2 { @@ -23,4 +23,4 @@ class Log2 { std::array m_values{}; }; -} // namespace ds2i +} // namespace pisa diff --git a/include/util/progress.hpp b/include/pisa/util/progress.hpp similarity index 99% rename from include/util/progress.hpp rename to include/pisa/util/progress.hpp index 4613aa606..ccb86c233 100644 --- a/include/util/progress.hpp +++ b/include/pisa/util/progress.hpp @@ -5,7 +5,7 @@ #include #include -namespace ds2i { +namespace pisa { class progress { diff --git a/include/util/semiasync_queue.hpp b/include/pisa/util/semiasync_queue.hpp similarity index 99% rename from include/util/semiasync_queue.hpp rename to include/pisa/util/semiasync_queue.hpp index 38f5eaf50..635233b71 100644 --- a/include/util/semiasync_queue.hpp +++ b/include/pisa/util/semiasync_queue.hpp @@ -7,7 +7,7 @@ #include "configuration.hpp" #include "util/util.hpp" -namespace ds2i { +namespace pisa { class semiasync_queue { public: diff --git a/include/util/single_init_vector.hpp b/include/pisa/util/single_init_vector.hpp similarity index 100% rename from include/util/single_init_vector.hpp rename to include/pisa/util/single_init_vector.hpp diff --git a/include/util/tables.hpp b/include/pisa/util/tables.hpp similarity index 99% rename from include/util/tables.hpp rename to include/pisa/util/tables.hpp index bdf206d36..746b105ad 100644 --- a/include/util/tables.hpp +++ b/include/pisa/util/tables.hpp @@ -2,7 +2,7 @@ #include -namespace ds2i { namespace tables { +namespace pisa { namespace tables { const uint8_t select_in_byte[2048] = { 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, diff --git a/include/util/util.hpp b/include/pisa/util/util.hpp similarity index 99% rename from include/util/util.hpp rename to include/pisa/util/util.hpp index b870226a5..6281152ea 100644 --- a/include/util/util.hpp +++ b/include/pisa/util/util.hpp @@ -24,7 +24,7 @@ # define DS2I_FLATTEN_FUNC DS2I_ALWAYSINLINE #endif -namespace ds2i { +namespace pisa { template inline IntType1 ceil_div(IntType1 dividend, IntType2 divisor) diff --git a/include/util/verify_collection.hpp b/include/pisa/util/verify_collection.hpp similarity index 96% rename from include/util/verify_collection.hpp rename to include/pisa/util/verify_collection.hpp index 2a174c459..01a4c0977 100644 --- a/include/util/verify_collection.hpp +++ b/include/pisa/util/verify_collection.hpp @@ -5,14 +5,14 @@ #include "succinct/mapper.hpp" #include "util/util.hpp" -using ds2i::logger; +using pisa::logger; template void verify_collection(InputCollection const &input, const char *filename) { Collection coll; mio::mmap_source m(filename); - ds2i::mapper::map(coll, m); + pisa::mapper::map(coll, m); size_t size=0; logger() << "Checking the written data, just to be extra safe..." << std::endl; size_t s = 0; diff --git a/include/wand_data.hpp b/include/pisa/wand_data.hpp similarity index 99% rename from include/wand_data.hpp rename to include/pisa/wand_data.hpp index ac35fbd5c..011e8bbcd 100644 --- a/include/wand_data.hpp +++ b/include/pisa/wand_data.hpp @@ -7,7 +7,7 @@ #include "wand_data_raw.hpp" class enumerator; -namespace ds2i { +namespace pisa { template > class wand_data { diff --git a/include/wand_data_compressed.hpp b/include/pisa/wand_data_compressed.hpp similarity index 99% rename from include/wand_data_compressed.hpp rename to include/pisa/wand_data_compressed.hpp index 16568b81a..3e040d28a 100644 --- a/include/wand_data_compressed.hpp +++ b/include/pisa/wand_data_compressed.hpp @@ -15,7 +15,7 @@ #include "global_parameters.hpp" -namespace ds2i { +namespace pisa { namespace { static const size_t score_bits_size = broadword::msb(configuration::get().reference_size); } diff --git a/include/wand_data_raw.hpp b/include/pisa/wand_data_raw.hpp similarity index 99% rename from include/wand_data_raw.hpp rename to include/pisa/wand_data_raw.hpp index 0c26c48ac..fe8cecfa0 100644 --- a/include/wand_data_raw.hpp +++ b/include/pisa/wand_data_raw.hpp @@ -8,7 +8,7 @@ #include "util/util.hpp" #include "wand_utils.hpp" -namespace ds2i { +namespace pisa { template class wand_data_raw { diff --git a/include/wand_utils.hpp b/include/pisa/wand_utils.hpp similarity index 98% rename from include/wand_utils.hpp rename to include/pisa/wand_utils.hpp index 43a4d3e1d..c868fb866 100644 --- a/include/wand_utils.hpp +++ b/include/pisa/wand_utils.hpp @@ -4,7 +4,7 @@ #include "score_opt_partition.hpp" #include "global_parameters.hpp" -namespace ds2i { +namespace pisa { enum class partition_type { fixed_blocks, variable_blocks }; @@ -72,4 +72,4 @@ variable_block_partition(binary_freq_collection const &coll, return std::make_pair(p.docids, p.max_values); } -} // namespace ds2i +} // namespace pisa diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dfce64cb5..e2ab5035c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,29 +1,14 @@ add_executable(create_freq_index create_freq_index.cpp) target_link_libraries(create_freq_index - Boost::boost - FastPFor - streamvbyte - MaskedVByte - QMX - simdcomp + pisa CLI11 - mio - ParallelSTL - GSL - ) +) add_executable(optimal_hybrid_index optimal_hybrid_index.cpp) +target_include_directories(optimal_hybrid_index PRIVATE ${STXXL_INCLUDE_DIRS}) target_link_libraries(optimal_hybrid_index - Boost::boost - FastPFor - streamvbyte - MaskedVByte - QMX - simdcomp ${STXXL_LIBRARIES} - mio - ParallelSTL - GSL + pisa ) set_target_properties(optimal_hybrid_index PROPERTIES CXX_STANDARD 14 @@ -31,125 +16,56 @@ set_target_properties(optimal_hybrid_index PROPERTIES add_executable(create_wand_data create_wand_data.cpp) target_link_libraries(create_wand_data - Boost::boost - QMX - simdcomp + pisa CLI11 - mio - ParallelSTL - GSL - ) +) add_executable(queries queries.cpp) target_link_libraries(queries - Boost::boost - FastPFor - streamvbyte - MaskedVByte - QMX - simdcomp + pisa CLI11 - mio - ParallelSTL - GSL - ) - +) add_executable(profile_queries profile_queries.cpp) target_link_libraries(profile_queries - Boost::boost - FastPFor - streamvbyte - MaskedVByte - QMX - simdcomp - mio - ParallelSTL - GSL - ) + pisa +) add_executable(profile_decoding profile_decoding.cpp) target_link_libraries(profile_decoding - Boost::boost - FastPFor - streamvbyte - MaskedVByte - QMX - simdcomp - mio - ParallelSTL - GSL - ) + pisa +) add_executable(shuffle_docids shuffle_docids.cpp) target_link_libraries(shuffle_docids - Boost::boost - FastPFor - streamvbyte - MaskedVByte - QMX - simdcomp - mio - ParallelSTL - GSL - ) + pisa +) add_executable(recursive_graph_bisection recursive_graph_bisection.cpp) target_link_libraries(recursive_graph_bisection - Boost::boost + pisa CLI11 - QMX - ParallelSTL - mio - GSL ) add_executable(evaluate_collection_ordering evaluate_collection_ordering.cpp) target_link_libraries(evaluate_collection_ordering - Boost::boost - FastPFor - streamvbyte - MaskedVByte - QMX - simdcomp - mio - ParallelSTL - GSL + pisa ) add_executable(parse_collection parse_collection.cpp) target_link_libraries(parse_collection - Boost::boost - Boost::filesystem + pisa CLI11 - mio - ParallelSTL - GSL - gumbo::gumbo - Porter2 - warcpp - ) -target_compile_features(parse_collection PRIVATE cxx_std_17) +) add_executable(invert invert.cpp) target_link_libraries(invert - Boost::boost - Boost::filesystem CLI11 - mio - ParallelSTL - GSL - gumbo::gumbo - ) -target_compile_features(invert PRIVATE cxx_std_17) + pisa +) add_executable(read_collection read_collection.cpp) target_link_libraries(read_collection - ${Boost_LIBRARIES} + pisa CLI11 - mio - ParallelSTL - GSL - gumbo::gumbo - ) -target_compile_features(parse_collection PRIVATE cxx_std_17) +) diff --git a/src/create_freq_index.cpp b/src/create_freq_index.cpp index 85304f437..54a85f422 100644 --- a/src/create_freq_index.cpp +++ b/src/create_freq_index.cpp @@ -19,17 +19,17 @@ #include "CLI/CLI.hpp" -using ds2i::logger; +using pisa::logger; template void dump_index_specific_stats(Collection const &, std::string const &) {} -void dump_index_specific_stats(ds2i::uniform_index const &coll, std::string const &type) { - ds2i::stats_line()("type", type)("log_partition_size", int(coll.params().log_partition_size)); +void dump_index_specific_stats(pisa::uniform_index const &coll, std::string const &type) { + pisa::stats_line()("type", type)("log_partition_size", int(coll.params().log_partition_size)); } -void dump_index_specific_stats(ds2i::opt_index const &coll, std::string const &type) { - auto const &conf = ds2i::configuration::get(); +void dump_index_specific_stats(pisa::opt_index const &coll, std::string const &type) { + auto const &conf = pisa::configuration::get(); uint64_t length_threshold = 4096; double long_postings = 0; @@ -45,18 +45,18 @@ void dump_index_specific_stats(ds2i::opt_index const &coll, std::string const &t } } - ds2i::stats_line()("type", type)("eps1", conf.eps1)("eps2", conf.eps2)( + pisa::stats_line()("type", type)("eps1", conf.eps1)("eps2", conf.eps2)( "fix_cost", conf.fix_cost)("docs_avg_part", long_postings / docs_partitions)( "freqs_avg_part", long_postings / freqs_partitions); } -template +template void create_collection(InputCollection const &input, - ds2i::global_parameters const ¶ms, + pisa::global_parameters const ¶ms, const boost::optional &output_filename, bool check, std::string const &seq_type) { - using namespace ds2i; + using namespace pisa; logger() << "Processing " << input.num_docs() << " documents" << std::endl; double tick = get_time_usecs(); @@ -64,7 +64,7 @@ void create_collection(InputCollection const &input, uint64_t size = 0; size_t postings = 0; { - ds2i::progress progress("Create index", input.size()); + pisa::progress progress("Create index", input.size()); for (auto const &plist : input) { uint64_t freqs_sum; size = plist.docs.size(); @@ -98,7 +98,7 @@ void create_collection(InputCollection const &input, int main(int argc, char **argv) { - using namespace ds2i; + using namespace pisa; std::string type; std::string input_basename; boost::optional output_filename; @@ -113,7 +113,7 @@ int main(int argc, char **argv) { binary_freq_collection input(input_basename.c_str()); - ds2i::global_parameters params; + pisa::global_parameters params; params.log_partition_size = configuration::get().log_partition_size; if (false) { diff --git a/src/create_wand_data.cpp b/src/create_wand_data.cpp index 933a7873e..b90d2e40b 100644 --- a/src/create_wand_data.cpp +++ b/src/create_wand_data.cpp @@ -12,7 +12,7 @@ #include "CLI/CLI.hpp" int main(int argc, const char **argv) { - using namespace ds2i; + using namespace pisa; std::string input_basename; std::string output_filename; diff --git a/src/evaluate_collection_ordering.cpp b/src/evaluate_collection_ordering.cpp index 17fed1af2..ed1e37b36 100644 --- a/src/evaluate_collection_ordering.cpp +++ b/src/evaluate_collection_ordering.cpp @@ -11,13 +11,13 @@ #include "util/index_build_utils.hpp" #include "util/util.hpp" -using ds2i::logger; +using pisa::logger; int main(int argc, const char** argv) { - using namespace ds2i; + using namespace pisa; if (argc != 2) { std::cerr << "Usage: " << argv[0] @@ -28,14 +28,14 @@ int main(int argc, const char** argv) const std::string input_basename = argv[1]; binary_freq_collection input(input_basename.c_str()); - + logger() << "Computing statistics about document ID space" << std::endl; std::vector log2_data(256); for (size_t i = 0; i < 256; ++i) { log2_data[i] = log2f(i); } - + double all_log_gaps = 0.0f; size_t no_gaps = 0; for (const auto& seq: input) { diff --git a/src/invert.cpp b/src/invert.cpp index 8afbff539..672f18d05 100644 --- a/src/invert.cpp +++ b/src/invert.cpp @@ -14,8 +14,8 @@ #include "invert.hpp" #include "util/util.hpp" -using ds2i::logger; -using namespace ds2i; +using pisa::logger; +using namespace pisa; int main(int argc, char **argv) { diff --git a/src/optimal_hybrid_index.cpp b/src/optimal_hybrid_index.cpp index c8dc6adae..bbea4e36f 100644 --- a/src/optimal_hybrid_index.cpp +++ b/src/optimal_hybrid_index.cpp @@ -22,14 +22,14 @@ #include "util/semiasync_queue.hpp" #include "util/progress.hpp" -using ds2i::logger; +using pisa::logger; typedef uint32_t block_id_type; // XXX for memory reasons, but would need size_t for very large indexes struct lambda_point { block_id_type block_id; float lambda; - ds2i::mixed_block::space_time_point st; + pisa::mixed_block::space_time_point st; struct comparator { bool operator()(lambda_point const& lhs, lambda_point const& rhs) const @@ -56,10 +56,10 @@ struct lambda_point { typedef stxxl::vector lambda_vector_type; template -struct lambdas_computer : ds2i::semiasync_queue::job { +struct lambdas_computer : pisa::semiasync_queue::job { lambdas_computer(block_id_type block_id_base, typename InputCollectionType::document_enumerator e, - ds2i::predictors_vec_type const& predictors, + pisa::predictors_vec_type const& predictors, std::vector& counts, lambda_vector_type& lambda_points) : m_block_id_base(block_id_base) @@ -72,7 +72,7 @@ struct lambdas_computer : ds2i::semiasync_queue::job { virtual void prepare() { - using namespace ds2i; + using namespace pisa; using namespace time_prediction; auto blocks = m_e.get_blocks(); @@ -138,7 +138,7 @@ struct lambdas_computer : ds2i::semiasync_queue::job { block_id_type m_block_id_base; typename InputCollectionType::document_enumerator m_e; - ds2i::predictors_vec_type const& m_predictors; + pisa::predictors_vec_type const& m_predictors; std::vector m_counts; double m_lambda; std::vector m_points_buf; @@ -153,10 +153,10 @@ void compute_lambdas(InputCollectionType const& input_coll, const char* lambdas_filename) { - using namespace ds2i; + using namespace pisa; using namespace time_prediction; - ds2i::progress progress("Computing lambdas", input_coll.size()); + pisa::progress progress("Computing lambdas", input_coll.size()); auto predictors = load_predictors(predictors_filename); std::ifstream block_stats(block_stats_filename); @@ -240,11 +240,11 @@ void compute_lambdas(InputCollectionType const& input_coll, } template -struct list_transformer : ds2i::semiasync_queue::job { +struct list_transformer : pisa::semiasync_queue::job { list_transformer(CollectionBuilder& b, typename InputCollectionType::document_enumerator e, - std::vector::const_iterator block_type_begin, - std::vector::const_iterator block_param_begin) + std::vector::const_iterator block_type_begin, + std::vector::const_iterator block_param_begin) : m_b(b) , m_e(e) , m_block_type(block_type_begin) @@ -253,7 +253,7 @@ struct list_transformer : ds2i::semiasync_queue::job { virtual void prepare() { - using namespace ds2i; + using namespace pisa; typedef typename InputCollectionType::document_enumerator::block_data input_block_type; typedef mixed_block::block_transformer output_block_type; @@ -281,14 +281,14 @@ struct list_transformer : ds2i::semiasync_queue::job { CollectionBuilder& m_b; typename InputCollectionType::document_enumerator m_e; - std::vector::const_iterator m_block_type; - std::vector::const_iterator m_block_param; + std::vector::const_iterator m_block_type; + std::vector::const_iterator m_block_param; std::vector m_buf; }; template -void optimal_hybrid_index(ds2i::global_parameters const& params, +void optimal_hybrid_index(pisa::global_parameters const& params, const char* predictors_filename, const char* block_stats_filename, const char* input_filename, @@ -296,7 +296,7 @@ void optimal_hybrid_index(ds2i::global_parameters const& params, const char* lambdas_filename, size_t budget) { - using namespace ds2i; + using namespace pisa; InputCollectionType input_coll; mio::mmap_source m(input_filename); @@ -426,7 +426,7 @@ void optimal_hybrid_index(ds2i::global_parameters const& params, typedef typename block_mixed_index::builder builder_type; builder_type builder(input_coll.num_docs(), params); - ds2i::progress progress("Building collection", input_coll.size()); + pisa::progress progress("Building collection", input_coll.size()); semiasync_queue queue(1 << 24); auto block_types_it = block_types.begin(); @@ -471,7 +471,7 @@ void optimal_hybrid_index(ds2i::global_parameters const& params, int main(int argc, const char** argv) { - using namespace ds2i; + using namespace pisa; if (argc < 5) { std::cerr << "Usage: " << argv[0] @@ -498,7 +498,7 @@ int main(int argc, const char** argv) { collection_basename = argv[9]; } - ds2i::global_parameters params; + pisa::global_parameters params; if (false) { #define LOOP_BODY(R, DATA, T) \ diff --git a/src/parse_collection.cpp b/src/parse_collection.cpp index 956c47742..25cb1aa45 100644 --- a/src/parse_collection.cpp +++ b/src/parse_collection.cpp @@ -7,8 +7,8 @@ #include "forward_index_builder.hpp" -using ds2i::logger; -using namespace ds2i; +using pisa::logger; +using namespace pisa; int main(int argc, char **argv) { diff --git a/src/profile_decoding.cpp b/src/profile_decoding.cpp index d25feb1e8..734b43c9a 100644 --- a/src/profile_decoding.cpp +++ b/src/profile_decoding.cpp @@ -9,7 +9,7 @@ #include "util/util.hpp" #include "dec_time_prediction.hpp" -namespace ds2i { +namespace pisa { double measure_decoding_time(size_t sum_of_values, size_t n, std::vector const& buf) @@ -108,7 +108,7 @@ namespace ds2i { int main(int /* argc */, const char** argv) { - using namespace ds2i; + using namespace pisa; std::string type = argv[1]; const char* index_filename = argv[2]; diff --git a/src/profile_queries.cpp b/src/profile_queries.cpp index 922bb9a1b..6e09c387f 100644 --- a/src/profile_queries.cpp +++ b/src/profile_queries.cpp @@ -17,9 +17,9 @@ template void op_profile(IndexType const& index, QueryOperator const& query_op, - std::vector const& queries) + std::vector const& queries) { - using namespace ds2i; + using namespace pisa; size_t n_threads = std::thread::hardware_concurrency(); std::vector threads(n_threads); @@ -46,19 +46,19 @@ template struct add_profiling { typedef IndexType type; }; template -struct add_profiling> { - typedef ds2i::block_freq_index type; +struct add_profiling> { + typedef pisa::block_freq_index type; }; template void profile(const std::string index_filename, const boost::optional &wand_data_filename, - std::vector const& queries, + std::vector const& queries, std::string const& type, std::string const& query_type) { - using namespace ds2i; + using namespace pisa; typename add_profiling::type index; typedef wand_data> WandType; @@ -103,7 +103,7 @@ void profile(const std::string index_filename, int main(int argc, const char** argv) { - using namespace ds2i; + using namespace pisa; std::string type = argv[1]; const char* query_type = argv[2]; diff --git a/src/queries.cpp b/src/queries.cpp index 97fea39a0..e5b8f320a 100644 --- a/src/queries.cpp +++ b/src/queries.cpp @@ -16,15 +16,14 @@ #include "CLI/CLI.hpp" -using namespace ds2i; +using namespace pisa; template void op_perftest(Functor query_func, // XXX!!! - std::vector const &queries, + std::vector const &queries, std::string const &index_type, std::string const &query_type, size_t runs) { - using namespace ds2i; std::vector query_times; @@ -66,11 +65,10 @@ void op_perftest(Functor query_func, // XXX!!! template void perftest(const std::string &index_filename, const boost::optional &wand_data_filename, - const std::vector &queries, + const std::vector &queries, std::string const &type, std::string const &query_type, uint64_t k) { - using namespace ds2i; IndexType index; logger() << "Loading index from " << index_filename << std::endl; mio::mmap_source m(index_filename.c_str()); @@ -107,33 +105,33 @@ void perftest(const std::string &index_filename, for (auto &&t : query_types) { logger() << "Query type: " << t << std::endl; - std::function query_fun; + std::function query_fun; if (t == "and") { - query_fun = [&](ds2i::term_id_vec query) { return and_query()(index, query); }; + query_fun = [&](term_id_vec query) { return and_query()(index, query); }; } else if (t == "and_freq") { - query_fun = [&](ds2i::term_id_vec query) { return and_query()(index, query); }; + query_fun = [&](term_id_vec query) { return and_query()(index, query); }; } else if (t == "or") { - query_fun = [&](ds2i::term_id_vec query) { return or_query()(index, query); }; + query_fun = [&](term_id_vec query) { return or_query()(index, query); }; } else if (t == "or_freq") { - query_fun = [&](ds2i::term_id_vec query) { return or_query()(index, query); }; + query_fun = [&](term_id_vec query) { return or_query()(index, query); }; } else if (t == "wand" && wand_data_filename) { - query_fun = [&](ds2i::term_id_vec query) { + query_fun = [&](term_id_vec query) { return wand_query(wdata, k)(index, query); }; } else if (t == "block_max_wand" && wand_data_filename) { - query_fun = [&](ds2i::term_id_vec query) { + query_fun = [&](term_id_vec query) { return block_max_wand_query(wdata, k)(index, query); }; } else if (t == "block_max_maxscore" && wand_data_filename) { - query_fun = [&](ds2i::term_id_vec query) { + query_fun = [&](term_id_vec query) { return block_max_maxscore_query(wdata, k)(index, query); }; } else if (t == "ranked_or" && wand_data_filename) { - query_fun = [&](ds2i::term_id_vec query) { + query_fun = [&](term_id_vec query) { return ranked_or_query(wdata, k)(index, query); }; } else if (t == "maxscore" && wand_data_filename) { - query_fun = [&](ds2i::term_id_vec query) { + query_fun = [&](term_id_vec query) { return maxscore_query(wdata, k)(index, query); }; } else { @@ -148,8 +146,6 @@ typedef wand_data> wand_raw_index; typedef wand_data> wand_uniform_index; int main(int argc, const char **argv) { - using namespace ds2i; - std::string type; std::string query_type; std::string index_filename; diff --git a/src/read_collection.cpp b/src/read_collection.cpp index 77ed02414..dc67cb397 100644 --- a/src/read_collection.cpp +++ b/src/read_collection.cpp @@ -6,8 +6,8 @@ #include "enumerate.hpp" #include "util/util.hpp" -using ds2i::logger; -using namespace ds2i; +using pisa::logger; +using namespace pisa; int main(int argc, char **argv) { diff --git a/src/recursive_graph_bisection.cpp b/src/recursive_graph_bisection.cpp index 8234c400b..f8f30efb9 100644 --- a/src/recursive_graph_bisection.cpp +++ b/src/recursive_graph_bisection.cpp @@ -7,7 +7,7 @@ #include "recursive_graph_bisection.hpp" #include "util/progress.hpp" -using namespace ds2i; +using namespace pisa; using iterator_type = std::vector::iterator; using range_type = document_range; using node_type = computation_node; @@ -30,14 +30,14 @@ inline void run_with_config(const std::string &config_file, const range_type &in nodes.begin(), nodes.end(), std::ptrdiff_t(0), [](auto acc, const auto &node) { return acc + node.partition.size(); }); - ds2i::progress bp_progress("Graph bisection", total_count); + pisa::progress bp_progress("Graph bisection", total_count); bp_progress.update(0); recursive_graph_bisection(std::move(nodes), bp_progress); } inline void run_default_tree(size_t depth, const range_type &initial_range) { std::cerr << "Default tree with depth " << depth << std::endl; - ds2i::progress bp_progress("Graph bisection", initial_range.size() * depth); + pisa::progress bp_progress("Graph bisection", initial_range.size() * depth); bp_progress.update(0); recursive_graph_bisection(initial_range, depth, depth - 6, bp_progress); } diff --git a/src/sample_index.cpp b/src/sample_index.cpp index c33bcc0f1..24f514e38 100644 --- a/src/sample_index.cpp +++ b/src/sample_index.cpp @@ -15,6 +15,6 @@ int main(int argc, char const *argv[]) { app.add_option("-n,--num-doc", num_docs, "Number of documents")->required(); CLI11_PARSE(app, argc, argv); - ds2i::sample_inverted_index(input_basename, output_basename, num_docs); + pisa::sample_inverted_index(input_basename, output_basename, num_docs); return 0; } diff --git a/src/shuffle_docids.cpp b/src/shuffle_docids.cpp index c18f54190..16ed3c5b8 100644 --- a/src/shuffle_docids.cpp +++ b/src/shuffle_docids.cpp @@ -12,12 +12,12 @@ #include "util/util.hpp" #include "util/progress.hpp" -using ds2i::logger; +using pisa::logger; int main(int argc, const char** argv) { - using namespace ds2i; + using namespace pisa; if (argc != 3 && argc != 4) { std::cerr << "Usage: " << argv[0] @@ -73,7 +73,7 @@ int main(int argc, const char** argv) emit(output_sizes, new_sizes.data(), num_docs); } - ds2i::progress progress("Shuffling posting lists", input.size()); + pisa::progress progress("Shuffling posting lists", input.size()); std::ofstream output_docs(output_basename + ".docs"); std::ofstream output_freqs(output_basename + ".freqs"); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bc5163b37..5d6392a8f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -5,11 +5,7 @@ foreach(TEST_SRC ${TEST_SOURCES}) get_filename_component (TEST_SRC_NAME ${TEST_SRC} NAME_WE) add_executable(${TEST_SRC_NAME} ${TEST_SRC}) target_link_libraries(${TEST_SRC_NAME} - Boost::boost - QMX - mio - ParallelSTL - GSL + pisa Catch2 ) add_test(${TEST_SRC_NAME} ${TEST_SRC_NAME}) @@ -18,62 +14,4 @@ foreach(TEST_SRC ${TEST_SOURCES}) add_coverage(${TEST_SRC_NAME}) endforeach(TEST_SRC) -target_link_libraries(test_block_codecs - FastPFor - streamvbyte - MaskedVByte - simdcomp -) - -target_link_libraries(test_block_posting_list - FastPFor - streamvbyte - MaskedVByte - simdcomp -) - -target_link_libraries(test_block_freq_index - FastPFor - streamvbyte - MaskedVByte - simdcomp -) - -target_link_libraries(test_sample_index - FastPFor - streamvbyte - MaskedVByte - simdcomp -) - -target_link_libraries(test_forward_index - FastPFor - streamvbyte - MaskedVByte - simdcomp - ParallelSTL -) - -target_compile_features(test_forward_index_builder PRIVATE cxx_std_17) -target_link_libraries(test_forward_index_builder - ParallelSTL - gumbo::gumbo - Boost::filesystem - Porter2 - warcpp -) - -target_compile_features(test_invert PRIVATE cxx_std_17) -target_link_libraries(test_invert - ParallelSTL - Boost::filesystem -) - -target_compile_features(test_html PRIVATE cxx_std_17) -target_link_libraries(test_html - gumbo::gumbo - Boost::filesystem - Porter2 -) - file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/test_data DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) diff --git a/test/test_bit_vector.cpp b/test/test_bit_vector.cpp index c65518bfa..f7661fcdb 100644 --- a/test/test_bit_vector.cpp +++ b/test/test_bit_vector.cpp @@ -15,17 +15,17 @@ TEST_CASE("bit_vector") std::vector v = random_bit_vector(); { - ds2i::bit_vector_builder bvb; + pisa::bit_vector_builder bvb; for (size_t i = 0; i < v.size(); ++i) { bvb.push_back(v[i]); } - ds2i::bit_vector bitmap(&bvb); + pisa::bit_vector bitmap(&bvb); test_equal_bits(v, bitmap, "Random bits (push_back)"); } { - ds2i::bit_vector_builder bvb(v.size()); + pisa::bit_vector_builder bvb(v.size()); for (size_t i = 0; i < v.size(); ++i) { bvb.set(i, v[i]); } @@ -34,34 +34,34 @@ TEST_CASE("bit_vector") bvb.push_back(1); v.push_back(1); - ds2i::bit_vector bitmap(&bvb); + pisa::bit_vector bitmap(&bvb); test_equal_bits(v, bitmap, "Random bits (set)"); } uint64_t ints[] = {uint64_t(-1), uint64_t(1) << 63, 1, 1, 1, 3, 5, 7, 0xFFF, 0xF0F, 1, 0xFFFFFF, 0x123456, uint64_t(1) << 63, uint64_t(-1)}; { - ds2i::bit_vector_builder bvb; + pisa::bit_vector_builder bvb; for(uint64_t i : ints) { - uint64_t len = ds2i::broadword::msb(i) + 1; + uint64_t len = pisa::broadword::msb(i) + 1; bvb.append_bits(i, len); } - ds2i::bit_vector bitmap(&bvb); + pisa::bit_vector bitmap(&bvb); uint64_t pos = 0; for(uint64_t i : ints) { - uint64_t len = ds2i::broadword::msb(i) + 1; + uint64_t len = pisa::broadword::msb(i) + 1; REQUIRE(i == bitmap.get_bits(pos, len)); pos += len; } } { - using ds2i::broadword::msb; + using pisa::broadword::msb; std::vector positions(1); for(uint64_t i : ints) { positions.push_back(positions.back() + msb(i) + 1); } - ds2i::bit_vector_builder bvb(positions.back()); + pisa::bit_vector_builder bvb(positions.back()); for (size_t i = 0; i < positions.size() - 1; ++i) { uint64_t v = ints[i]; @@ -69,7 +69,7 @@ TEST_CASE("bit_vector") bvb.set_bits(positions[i], v, len); } - ds2i::bit_vector bitmap(&bvb); + pisa::bit_vector bitmap(&bvb); for (size_t i = 0; i < positions.size() - 1; ++i) { uint64_t v = ints[i]; uint64_t len = positions[i + 1] - positions[i]; @@ -82,19 +82,19 @@ TEST_CASE("bit_vector_enumerator") { srand(42); std::vector v = random_bit_vector(); - ds2i::bit_vector bitmap(v); + pisa::bit_vector bitmap(v); size_t i = 0; size_t pos = 0; - ds2i::bit_vector::enumerator e(bitmap, pos); + pisa::bit_vector::enumerator e(bitmap, pos); while (pos < bitmap.size()) { bool next = e.next(); MY_REQUIRE_EQUAL(next, v[pos], "pos = " << pos << " i = " << i); pos += 1; pos += size_t(rand()) % (bitmap.size() - pos + 1); - e = ds2i::bit_vector::enumerator(bitmap, pos); + e = pisa::bit_vector::enumerator(bitmap, pos); i += 1; } } @@ -111,7 +111,7 @@ TEST_CASE("bit_vector_unary_enumerator") std::fill(v.begin(), v.begin() + l, 0); } - ds2i::bit_vector bitmap(v); + pisa::bit_vector bitmap(v); std::vector ones; for (size_t i = 0; i < v.size(); ++i) { @@ -121,7 +121,7 @@ TEST_CASE("bit_vector_unary_enumerator") } { - ds2i::bit_vector::unary_enumerator e(bitmap, 0); + pisa::bit_vector::unary_enumerator e(bitmap, 0); for (size_t r = 0; r < ones.size(); ++r) { uint64_t pos = e.next(); @@ -131,11 +131,11 @@ TEST_CASE("bit_vector_unary_enumerator") } { - ds2i::bit_vector::unary_enumerator e(bitmap, 0); + pisa::bit_vector::unary_enumerator e(bitmap, 0); for (size_t r = 0; r < ones.size(); ++r) { for (size_t k = 0; k < std::min(size_t(256), size_t(ones.size() - r)); ++k) { - ds2i::bit_vector::unary_enumerator ee(e); + pisa::bit_vector::unary_enumerator ee(e); ee.skip(k); uint64_t pos = ee.next(); MY_REQUIRE_EQUAL(ones[r + k], pos, @@ -146,11 +146,11 @@ TEST_CASE("bit_vector_unary_enumerator") } { - ds2i::bit_vector::unary_enumerator e(bitmap, 0); + pisa::bit_vector::unary_enumerator e(bitmap, 0); for (size_t r = 0; r < ones.size(); ++r) { for (size_t k = 0; k < std::min(size_t(256), size_t(ones.size() - r)); ++k) { - ds2i::bit_vector::unary_enumerator ee(e); + pisa::bit_vector::unary_enumerator ee(e); uint64_t pos_skip = ee.skip_no_move(k); uint64_t pos = ee.next(); MY_REQUIRE_EQUAL(ones[r], pos, @@ -164,13 +164,13 @@ TEST_CASE("bit_vector_unary_enumerator") } { - ds2i::bit_vector::unary_enumerator e(bitmap, 0); + pisa::bit_vector::unary_enumerator e(bitmap, 0); for (size_t pos = 0; pos < v.size(); ++pos) { uint64_t skip = 0; for (size_t d = 0; d < std::min(size_t(256), size_t(v.size() - pos)); ++d) { if (v[pos + d] == 0) { - ds2i::bit_vector::unary_enumerator ee(bitmap, pos); + pisa::bit_vector::unary_enumerator ee(bitmap, pos); ee.skip0(skip); uint64_t expected_pos = pos + d; @@ -190,7 +190,7 @@ TEST_CASE("bit_vector_unary_enumerator") void test_bvb_reverse(size_t n) { std::vector v = random_bit_vector(n); - ds2i::bit_vector_builder bvb; + pisa::bit_vector_builder bvb; for (size_t i = 0; i < v.size(); ++i) { bvb.push_back(v[i]); } @@ -198,7 +198,7 @@ void test_bvb_reverse(size_t n) std::reverse(v.begin(), v.end()); bvb.reverse(); - ds2i::bit_vector bitmap(&bvb); + pisa::bit_vector bitmap(&bvb); test_equal_bits(v, bitmap, "In-place reverse"); } diff --git a/test/test_block_codecs.cpp b/test/test_block_codecs.cpp index 7f9b77109..dbe6283d7 100644 --- a/test/test_block_codecs.cpp +++ b/test/test_block_codecs.cpp @@ -44,14 +44,14 @@ void test_block_codec() TEST_CASE("block_codecs") { - test_block_codec(); - test_block_codec(); - test_block_codec(); - test_block_codec(); - test_block_codec(); - test_block_codec(); - test_block_codec(); - test_block_codec(); - test_block_codec(); - test_block_codec(); + test_block_codec(); + test_block_codec(); + test_block_codec(); + test_block_codec(); + test_block_codec(); + test_block_codec(); + test_block_codec(); + test_block_codec(); + test_block_codec(); + test_block_codec(); } diff --git a/test/test_block_freq_index.cpp b/test/test_block_freq_index.cpp index c750aada4..d202ccb5a 100644 --- a/test/test_block_freq_index.cpp +++ b/test/test_block_freq_index.cpp @@ -23,9 +23,9 @@ template void test_block_freq_index() { - ds2i::global_parameters params; + pisa::global_parameters params; uint64_t universe = 20000; - typedef ds2i::block_freq_index collection_type; + typedef pisa::block_freq_index collection_type; typename collection_type::builder b(universe, params); typedef std::vector vec_type; @@ -46,13 +46,13 @@ void test_block_freq_index() { collection_type coll; b.build(coll); - ds2i::mapper::freeze(coll, "temp.bin"); + pisa::mapper::freeze(coll, "temp.bin"); } { collection_type coll; mio::mmap_source m("temp.bin"); - ds2i::mapper::map(coll, m); + pisa::mapper::map(coll, m); for (size_t i = 0; i < posting_lists.size(); ++i) { auto const& plist = posting_lists[i]; @@ -71,14 +71,14 @@ void test_block_freq_index() TEST_CASE("block_freq_index") { - test_block_freq_index(); - test_block_freq_index(); - test_block_freq_index(); - test_block_freq_index(); - test_block_freq_index(); - test_block_freq_index(); - test_block_freq_index(); - test_block_freq_index(); - test_block_freq_index(); - test_block_freq_index(); + test_block_freq_index(); + test_block_freq_index(); + test_block_freq_index(); + test_block_freq_index(); + test_block_freq_index(); + test_block_freq_index(); + test_block_freq_index(); + test_block_freq_index(); + test_block_freq_index(); + test_block_freq_index(); } diff --git a/test/test_block_posting_list.cpp b/test/test_block_posting_list.cpp index 19700858b..c755b6e9b 100644 --- a/test/test_block_posting_list.cpp +++ b/test/test_block_posting_list.cpp @@ -61,7 +61,7 @@ void random_posting_data(uint64_t n, uint64_t universe, template void test_block_posting_list() { - typedef ds2i::block_posting_list posting_list_type; + typedef pisa::block_posting_list posting_list_type; uint64_t universe = 20000; for (size_t t = 0; t < 20; ++t) { double avg_gap = 1.1 + double(rand()) / RAND_MAX * 10; @@ -80,7 +80,7 @@ void test_block_posting_list() template void test_block_posting_list_reordering() { - typedef ds2i::block_posting_list posting_list_type; + typedef pisa::block_posting_list posting_list_type; uint64_t universe = 20000; for (size_t t = 0; t < 20; ++t) { double avg_gap = 1.1 + double(rand()) / RAND_MAX * 10; @@ -106,18 +106,18 @@ void test_block_posting_list_reordering() TEST_CASE("block_posting_list") { - test_block_posting_list(); - test_block_posting_list(); - test_block_posting_list(); - test_block_posting_list(); - test_block_posting_list(); - test_block_posting_list(); - test_block_posting_list(); - test_block_posting_list(); - test_block_posting_list(); - test_block_posting_list(); + test_block_posting_list(); + test_block_posting_list(); + test_block_posting_list(); + test_block_posting_list(); + test_block_posting_list(); + test_block_posting_list(); + test_block_posting_list(); + test_block_posting_list(); + test_block_posting_list(); + test_block_posting_list(); } TEST_CASE("block_posting_list_reordering") { - test_block_posting_list_reordering(); + test_block_posting_list_reordering(); } diff --git a/test/test_bmw_queries.cpp b/test/test_bmw_queries.cpp index 3496de8cb..67b5c82fe 100644 --- a/test/test_bmw_queries.cpp +++ b/test/test_bmw_queries.cpp @@ -7,7 +7,7 @@ #include "index_types.hpp" #include "query/queries.hpp" -namespace ds2i { +namespace pisa { namespace test { struct index_initialization { @@ -73,12 +73,12 @@ struct index_initialization { }; } // namespace test -} // namespace ds2i +} // namespace pisa -TEST_CASE_METHOD(ds2i::test::index_initialization, "block_max_wand") { - ds2i::block_max_wand_query block_max_wand_q(wdata, 10); - ds2i::block_max_wand_query block_max_wand_uniform_q(wdata_uniform, 10); - ds2i::block_max_wand_query block_max_wand_fixed_q(wdata_fixed, 10); +TEST_CASE_METHOD(pisa::test::index_initialization, "block_max_wand") { + pisa::block_max_wand_query block_max_wand_q(wdata, 10); + pisa::block_max_wand_query block_max_wand_uniform_q(wdata_uniform, 10); + pisa::block_max_wand_query block_max_wand_fixed_q(wdata_fixed, 10); test_against_wand(block_max_wand_uniform_q); test_against_wand(block_max_wand_q); test_against_wand(block_max_wand_fixed_q); diff --git a/test/test_compact_elias_fano.cpp b/test/test_compact_elias_fano.cpp index 27144442f..ccd5a08f9 100644 --- a/test/test_compact_elias_fano.cpp +++ b/test/test_compact_elias_fano.cpp @@ -17,19 +17,19 @@ struct sequence_initialization { // high granularity to test more corner cases params.ef_log_sampling0 = 4; params.ef_log_sampling1 = 5; - ds2i::bit_vector_builder bvb; - ds2i::compact_elias_fano::write(bvb, + pisa::bit_vector_builder bvb; + pisa::compact_elias_fano::write(bvb, seq.begin(), universe, seq.size(), params); - ds2i::bit_vector(&bvb).swap(bv); + pisa::bit_vector(&bvb).swap(bv); } - ds2i::global_parameters params; + pisa::global_parameters params; size_t n; size_t universe; std::vector seq; - ds2i::bit_vector bv; + pisa::bit_vector bv; }; TEST_CASE_METHOD(sequence_initialization, "compact_elias_fano_singleton") @@ -37,16 +37,16 @@ TEST_CASE_METHOD(sequence_initialization, "compact_elias_fano_singleton") // test singleton sequences std::vector short_seq; short_seq.push_back(0); - test_sequence(ds2i::compact_elias_fano(), params, 1, short_seq); + test_sequence(pisa::compact_elias_fano(), params, 1, short_seq); short_seq[0] = 1; - test_sequence(ds2i::compact_elias_fano(), params, 2, short_seq); + test_sequence(pisa::compact_elias_fano(), params, 2, short_seq); } TEST_CASE_METHOD(sequence_initialization, "compact_elias_fano_construction") { // test pointers and low-level values - ds2i::compact_elias_fano::offsets of(0, + pisa::compact_elias_fano::offsets of(0, universe, seq.size(), params); uint64_t rank = 0; @@ -80,7 +80,7 @@ TEST_CASE_METHOD(sequence_initialization, "compact_elias_fano_construction") TEST_CASE_METHOD(sequence_initialization, "compact_elias_fano_enumerator") { - ds2i::compact_elias_fano::enumerator r(bv, 0, + pisa::compact_elias_fano::enumerator r(bv, 0, universe, seq.size(), params); test_sequence(r, seq); @@ -91,6 +91,6 @@ TEST_CASE_METHOD(sequence_initialization, "compact_elias_fano_weakly_monotone") n = 100000; universe = n * 3; std::vector seq = random_sequence(universe, n, false); - test_sequence(ds2i::compact_elias_fano(), params, universe, seq); + test_sequence(pisa::compact_elias_fano(), params, universe, seq); } diff --git a/test/test_compact_ranked_bitvector.cpp b/test/test_compact_ranked_bitvector.cpp index a5988e550..8f67694d2 100644 --- a/test/test_compact_ranked_bitvector.cpp +++ b/test/test_compact_ranked_bitvector.cpp @@ -17,28 +17,28 @@ struct sequence_initialization { // high granularity to test more corner cases params.rb_log_rank1_sampling = 6; params.rb_log_sampling1 = 5; - ds2i::bit_vector_builder bvb; - ds2i::compact_ranked_bitvector::write(bvb, + pisa::bit_vector_builder bvb; + pisa::compact_ranked_bitvector::write(bvb, seq.begin(), universe, seq.size(), params); - ds2i::bit_vector(&bvb).swap(bv); + pisa::bit_vector(&bvb).swap(bv); } - ds2i::global_parameters params; + pisa::global_parameters params; size_t n; size_t universe; uint64_t log_rank1_sampling; uint64_t log_sampling1; std::vector seq; - ds2i::bit_vector bv; + pisa::bit_vector bv; }; TEST_CASE_METHOD(sequence_initialization, "compact_ranked_bitvector_construction") { // test pointers and rank samples - ds2i::compact_ranked_bitvector::offsets of(0, + pisa::compact_ranked_bitvector::offsets of(0, universe, seq.size(), params); uint64_t rank = 0; @@ -72,14 +72,14 @@ TEST_CASE_METHOD(sequence_initialization, "compact_ranked_bitvector_singleton") // test singleton sequences std::vector short_seq; short_seq.push_back(0); - test_sequence(ds2i::compact_ranked_bitvector(), params, 1, short_seq); + test_sequence(pisa::compact_ranked_bitvector(), params, 1, short_seq); short_seq[0] = 1; - test_sequence(ds2i::compact_ranked_bitvector(), params, 2, short_seq); + test_sequence(pisa::compact_ranked_bitvector(), params, 2, short_seq); } TEST_CASE_METHOD(sequence_initialization, "compact_ranked_bitvector_enumerator") { - ds2i::compact_ranked_bitvector::enumerator r(bv, 0, + pisa::compact_ranked_bitvector::enumerator r(bv, 0, universe, seq.size(), params); test_sequence(r, seq); diff --git a/test/test_forward_index.cpp b/test/test_forward_index.cpp index 5ea106690..5291d5c10 100644 --- a/test/test_forward_index.cpp +++ b/test/test_forward_index.cpp @@ -9,7 +9,7 @@ TEST_CASE("write_and_read") { // given - using namespace ds2i; + using namespace pisa; std::string invind_input("test_data/test_collection"); std::string fwdind_file("temp_collection"); auto fwd = forward_index::from_inverted_index(invind_input, 0, true); diff --git a/test/test_forward_index_builder.cpp b/test/test_forward_index_builder.cpp index e47601c6e..a49f5b731 100644 --- a/test/test_forward_index_builder.cpp +++ b/test/test_forward_index_builder.cpp @@ -21,9 +21,9 @@ using namespace boost::filesystem; TEST_CASE("Batch file name", "[parsing][forward_index]") { std::string basename = "basename"; - REQUIRE(ds2i::Forward_Index_Builder::batch_file(basename, 0) == + REQUIRE(pisa::Forward_Index_Builder::batch_file(basename, 0) == basename + ".batch.0"); - REQUIRE(ds2i::Forward_Index_Builder::batch_file(basename, 10) == + REQUIRE(pisa::Forward_Index_Builder::batch_file(basename, 10) == basename + ".batch.10"); } @@ -37,7 +37,7 @@ TEST_CASE("Write document to stream", "[parsing][forward_index]") 4, 0, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0}}, {{}, {0, 0, 0, 0}}})); WHEN("List of term IDs is written to stream") { - ds2i::Forward_Index_Builder::write_document( + pisa::Forward_Index_Builder::write_document( os, term_ids.begin(), term_ids.end()); THEN("Encoded sequence is " << encoded_sequence) { REQUIRE(os.str() == encoded_sequence); } } @@ -53,7 +53,7 @@ TEST_CASE("Write header", "[parsing][forward_index]") {10, {1, 0, 0, 0, 10, 0, 0, 0}}})); GIVEN("Document count is " << document_count) WHEN("Header is written to stream") { - ds2i::Forward_Index_Builder::write_header(os, document_count); + pisa::Forward_Index_Builder::write_header(os, document_count); THEN("Encoded header is " << encoded_header) { REQUIRE(os.str() == encoded_header); } } } @@ -92,7 +92,7 @@ TEST_CASE("Build forward index batch", "[parsing][forward_index]") auto identity = [](std::string const &term) -> std::string { return term; }; GIVEN("a few test records") { - std::vector records{ + std::vector records{ {"Doc10", "lorem ipsum dolor sit amet consectetur adipiscing elit"}, {"Doc11", "integer rutrum felis et sagittis dapibus"}, {"Doc12", "vivamus ac velit nec purus molestie tincidunt"}, @@ -101,10 +101,10 @@ TEST_CASE("Build forward index batch", "[parsing][forward_index]") WHEN("write a batch to temp directory") { Temporary_Directory tmpdir; auto output_file = tmpdir.path() / "fwd"; - ds2i::Forward_Index_Builder::Batch_Process bp{ - 7, records, ds2i::Document_Id{10}, output_file.string()}; - ds2i::Forward_Index_Builder builder; - builder.run(bp, identity, ds2i::parse_plaintext_content); + pisa::Forward_Index_Builder::Batch_Process bp{ + 7, records, pisa::Document_Id{10}, output_file.string()}; + pisa::Forward_Index_Builder builder; + builder.run(bp, identity, pisa::parse_plaintext_content); THEN("documents are in check") { std::vector expected_documents{ "Doc10", "Doc11", "Doc12", "Doc13", "Doc14"}; @@ -123,7 +123,7 @@ TEST_CASE("Build forward index batch", "[parsing][forward_index]") REQUIRE(terms == expected_terms); } THEN("term IDs") { - ds2i::binary_collection coll((output_file.string() + ".batch.7").c_str()); + pisa::binary_collection coll((output_file.string() + ".batch.7").c_str()); std::vector> documents; for (auto seq_iter = ++coll.begin(); seq_iter != coll.end(); ++seq_iter) { auto seq = *seq_iter; @@ -151,9 +151,9 @@ void write_batch(std::string const & basename, write_lines(document_file, gsl::make_span(documents)); write_lines(term_file, gsl::make_span(terms)); std::ofstream os(basename); - ds2i::Forward_Index_Builder::write_header(os, collection.size()); + pisa::Forward_Index_Builder::write_header(os, collection.size()); for (auto const& seq : collection) { - ds2i::Forward_Index_Builder::write_document( + pisa::Forward_Index_Builder::write_document( os, seq.begin(), seq.end()); } } @@ -215,7 +215,7 @@ TEST_CASE("Merge forward index batches", "[parsing][forward_index]") WHEN("Merging function is called") { auto output_file = (dir / "fwd").string(); - ds2i::Forward_Index_Builder builder; + pisa::Forward_Index_Builder builder; builder.merge(output_file, 5, 3); THEN("documents are in check") { @@ -236,7 +236,7 @@ TEST_CASE("Merge forward index batches", "[parsing][forward_index]") REQUIRE(terms == expected_terms); } THEN("term IDs") { - ds2i::binary_collection coll((output_file).c_str()); + pisa::binary_collection coll((output_file).c_str()); std::vector> documents; for (auto seq_iter = ++coll.begin(); seq_iter != coll.end(); ++seq_iter) { auto seq = *seq_iter; @@ -258,11 +258,11 @@ TEST_CASE("Parse HTML content", "[parsing][forward_index][unit]") std::vector vec; auto map_word = [&](std::string &&word) { vec.push_back(word); }; SECTION("empty") { - ds2i::parse_html_content("", map_word); + pisa::parse_html_content("", map_word); REQUIRE(vec == std::vector{}); } SECTION("non-empty") { - ds2i::parse_html_content("loremipsum", map_word); + pisa::parse_html_content("loremipsum", map_word); REQUIRE(vec == std::vector{"lorem", "ipsum"}); } } @@ -279,8 +279,8 @@ TEST_CASE("Parse HTML content", "[parsing][forward_index][unit]") TEST_CASE("Build forward index", "[parsing][forward_index][integration]") { - auto next_record = [](std::istream &in) -> std::optional { - ds2i::Plaintext_Record record; + auto next_record = [](std::istream &in) -> std::optional { + pisa::Plaintext_Record record; if (in >> record) { return record; } @@ -298,24 +298,24 @@ TEST_CASE("Build forward index", "[parsing][forward_index][integration]") std::string output = (dir / "fwd").string(); std::ifstream is(input); - ds2i::Forward_Index_Builder builder; + pisa::Forward_Index_Builder builder; builder.build( is, output, next_record, [](std::string &&term) -> std::string { return std::forward(term); }, - ds2i::parse_plaintext_content, + pisa::parse_plaintext_content, batch_size, thread_count); THEN("The collection mapped to terms matches input") { auto term_map = load_term_map(output); - ds2i::binary_collection coll((output).c_str()); + pisa::binary_collection coll((output).c_str()); auto seq_iter = coll.begin(); REQUIRE(*seq_iter->begin() == 10000); ++seq_iter; std::ifstream plain_is(input); - std::optional record = std::nullopt; + std::optional record = std::nullopt; while ((record = next_record(plain_is)).has_value()) { std::vector original_body; std::istringstream content_stream(record->content()); @@ -330,7 +330,7 @@ TEST_CASE("Build forward index", "[parsing][forward_index][integration]") REQUIRE(produced_body == original_body); ++seq_iter; } - auto batch_files = ds2i::ls(dir, [](auto const &filename) { + auto batch_files = pisa::ls(dir, [](auto const &filename) { return filename.find("batch") != std::string::npos; }); REQUIRE(batch_files.empty()); @@ -355,8 +355,8 @@ TEST_CASE("Build forward index (WARC)", "[.][parsing][forward_index][integration [](unsigned char c) { return std::tolower(c); }); return stem::Porter2{}.stem(term); }; - auto next_plain_record = [](std::istream &in) -> std::optional { - ds2i::Plaintext_Record record; + auto next_plain_record = [](std::istream &in) -> std::optional { + pisa::Plaintext_Record record; if (in >> record) { return record; } @@ -374,24 +374,24 @@ TEST_CASE("Build forward index (WARC)", "[.][parsing][forward_index][integration std::string output = (dir / "fwd").string(); std::ifstream is(input); - ds2i::Forward_Index_Builder builder; + pisa::Forward_Index_Builder builder; builder.build(is, output, next_record, process_term, - ds2i::parse_html_content, + pisa::parse_html_content, batch_size, thread_count); THEN("The collection mapped to terms matches input") { auto term_map = load_term_map(output); - ds2i::binary_collection coll((output).c_str()); + pisa::binary_collection coll((output).c_str()); auto seq_iter = coll.begin(); CHECK(*seq_iter->begin() == 10000); ++seq_iter; std::ifstream plain_is(DS2I_SOURCE_DIR "/test/test_data/clueweb1k.plaintext"); std::ifstream doc_is(output + ".documents"); - std::optional record = std::nullopt; + std::optional record = std::nullopt; while ((record = next_plain_record(plain_is)).has_value()) { std::string doc; std::getline(doc_is, doc); @@ -410,7 +410,7 @@ TEST_CASE("Build forward index (WARC)", "[.][parsing][forward_index][integration CHECK(produced_body == original_body); ++seq_iter; } - auto batch_files = ds2i::ls(dir, [](auto const &filename) { + auto batch_files = pisa::ls(dir, [](auto const &filename) { return filename.find("batch") != std::string::npos; }); REQUIRE(batch_files.empty()); diff --git a/test/test_freq_index.cpp b/test/test_freq_index.cpp index 3d44bda4e..4da0ba628 100644 --- a/test/test_freq_index.cpp +++ b/test/test_freq_index.cpp @@ -19,9 +19,9 @@ template void test_freq_index() { - ds2i::global_parameters params; + pisa::global_parameters params; uint64_t universe = 20000; - typedef ds2i::freq_index + typedef pisa::freq_index collection_type; typename collection_type::builder b(universe, params); @@ -45,13 +45,13 @@ void test_freq_index() { collection_type coll; b.build(coll); - ds2i::mapper::freeze(coll, "temp.bin"); + pisa::mapper::freeze(coll, "temp.bin"); } { collection_type coll; mio::mmap_source m("temp.bin"); - ds2i::mapper::map(coll, m); + pisa::mapper::map(coll, m); for (size_t i = 0; i < posting_lists.size(); ++i) { auto const& plist = posting_lists[i]; @@ -70,11 +70,11 @@ void test_freq_index() TEST_CASE("freq_index") { - using ds2i::indexed_sequence; - using ds2i::strict_sequence; - using ds2i::positive_sequence; - using ds2i::partitioned_sequence; - using ds2i::uniform_partitioned_sequence; + using pisa::indexed_sequence; + using pisa::strict_sequence; + using pisa::positive_sequence; + using pisa::partitioned_sequence; + using pisa::uniform_partitioned_sequence; test_freq_index>(); diff --git a/test/test_generic_sequence.hpp b/test/test_generic_sequence.hpp index dba777d01..e4916a2f9 100644 --- a/test/test_generic_sequence.hpp +++ b/test/test_generic_sequence.hpp @@ -176,7 +176,7 @@ void test_sequence(SequenceReader r, std::vector const& seq, } template -typename ds2i::if_has_next_geq +typename pisa::if_has_next_geq test_sequence(SequenceReader r, std::vector const& seq, next_geq_tag const&) { @@ -196,9 +196,9 @@ inline void test_sequence(SequenceType, uint64_t universe, std::vector const& seq) { - ds2i::bit_vector_builder bvb; + pisa::bit_vector_builder bvb; SequenceType::write(bvb, seq.begin(), universe, seq.size(), params); - ds2i::bit_vector bv(&bvb); + pisa::bit_vector bv(&bvb); typename SequenceType::enumerator r(bv, 0, universe, seq.size(), params); test_sequence(r, seq); } diff --git a/test/test_html.cpp b/test/test_html.cpp index 0819d8c93..eec32e0ee 100644 --- a/test/test_html.cpp +++ b/test/test_html.cpp @@ -5,7 +5,7 @@ #include "parsing/html.hpp" -using namespace ds2i::parsing::html; +using namespace pisa::parsing::html; TEST_CASE("Parse WARC version", "[warc][unit]") { diff --git a/test/test_indexed_sequence.cpp b/test/test_indexed_sequence.cpp index 8a5aa710f..007910d51 100644 --- a/test/test_indexed_sequence.cpp +++ b/test/test_indexed_sequence.cpp @@ -9,7 +9,7 @@ TEST_CASE("indexed_sequence") { - ds2i::global_parameters params; + pisa::global_parameters params; std::vector avg_gaps = { 1.1, 1.9, 2.5, 3, 4, 5, 10 }; for (auto avg_gap: avg_gaps) { @@ -17,6 +17,6 @@ TEST_CASE("indexed_sequence") uint64_t universe = uint64_t(n * avg_gap); auto seq = random_sequence(universe, n, true); - test_sequence(ds2i::indexed_sequence(), params, universe, seq); + test_sequence(pisa::indexed_sequence(), params, universe, seq); } } diff --git a/test/test_invert.cpp b/test/test_invert.cpp index bbf211e19..2547d8965 100644 --- a/test/test_invert.cpp +++ b/test/test_invert.cpp @@ -16,8 +16,8 @@ #include "temporary_directory.hpp" using namespace boost::filesystem; -using namespace ds2i; -using namespace ds2i::literals; +using namespace pisa; +using namespace pisa::literals; using posting_vector_type = std::vector>; using iterator_type = decltype(std::declval().begin()); @@ -289,7 +289,7 @@ TEST_CASE("Invert collection", "[invert][unit]") reinterpret_cast(mmf.data()) + mmf.size() / sizeof(uint32_t)); REQUIRE(d == document_data); REQUIRE(f == frequency_data); - auto batch_files = ds2i::ls(tmpdir.path().string(), [](auto const &filename) { + auto batch_files = pisa::ls(tmpdir.path().string(), [](auto const &filename) { return filename.find("batch") != std::string::npos; }); REQUIRE(batch_files.empty()); diff --git a/test/test_mapper.cpp b/test/test_mapper.cpp index 4f0755696..fc4e071c5 100644 --- a/test/test_mapper.cpp +++ b/test/test_mapper.cpp @@ -9,7 +9,7 @@ TEST_CASE("basic_map") { - ds2i::mapper::mappable_vector vec; + pisa::mapper::mappable_vector vec; REQUIRE(vec.size() == 0U); int nums[] = {1, 2, 3, 4}; @@ -19,12 +19,12 @@ TEST_CASE("basic_map") REQUIRE(1 == vec[0]); REQUIRE(4 == vec[3]); - ds2i::mapper::freeze(vec, "temp.bin"); + pisa::mapper::freeze(vec, "temp.bin"); { - ds2i::mapper::mappable_vector mapped_vec; + pisa::mapper::mappable_vector mapped_vec; mio::mmap_source m("temp.bin"); - ds2i::mapper::map(mapped_vec, m); + pisa::mapper::map(mapped_vec, m); REQUIRE(vec.size() == mapped_vec.size()); REQUIRE(std::equal(vec.begin(), vec.end(), mapped_vec.begin())); } @@ -53,16 +53,16 @@ class complex_struct { } uint64_t m_a; - ds2i::mapper::mappable_vector m_b; + pisa::mapper::mappable_vector m_b; }; TEST_CASE("complex_struct_map") { complex_struct s; s.init(); - ds2i::mapper::freeze(s, "temp.bin"); + pisa::mapper::freeze(s, "temp.bin"); - REQUIRE(24 == ds2i::mapper::size_of(s)); + REQUIRE(24 == pisa::mapper::size_of(s)); complex_struct mapped_s; REQUIRE(0 == mapped_s.m_a); @@ -70,7 +70,7 @@ TEST_CASE("complex_struct_map") { mio::mmap_source m("temp.bin"); - ds2i::mapper::map(mapped_s, m); + pisa::mapper::map(mapped_s, m); REQUIRE(s.m_a == mapped_s.m_a); REQUIRE(s.m_b.size() == mapped_s.m_b.size()); } diff --git a/test/test_partitioned_sequence.cpp b/test/test_partitioned_sequence.cpp index 15b069bcc..60dc15ed3 100644 --- a/test/test_partitioned_sequence.cpp +++ b/test/test_partitioned_sequence.cpp @@ -11,7 +11,7 @@ #include "sequence/strict_sequence.hpp" -namespace ds2i { +namespace pisa { class partitioned_sequence_test { public: @@ -49,22 +49,22 @@ template void test_partitioned_sequence(uint64_t universe, std::vector const& seq) { - ds2i::global_parameters params; - typedef ds2i::partitioned_sequence sequence_type; + pisa::global_parameters params; + typedef pisa::partitioned_sequence sequence_type; - ds2i::bit_vector_builder bvb; + pisa::bit_vector_builder bvb; sequence_type::write(bvb, seq.begin(), universe, seq.size(), params); - ds2i::bit_vector bv(&bvb); + pisa::bit_vector bv(&bvb); typename sequence_type::enumerator r(bv, 0, universe, seq.size(), params); - ds2i::partitioned_sequence_test::test_construction(r, seq); + pisa::partitioned_sequence_test::test_construction(r, seq); test_sequence(r, seq); } TEST_CASE("partitioned_sequence") { - using ds2i::indexed_sequence; - using ds2i::strict_sequence; + using pisa::indexed_sequence; + using pisa::strict_sequence; // test singleton sequences { diff --git a/test/test_positive_sequence.cpp b/test/test_positive_sequence.cpp index 05cdd619c..493b65e76 100644 --- a/test/test_positive_sequence.cpp +++ b/test/test_positive_sequence.cpp @@ -15,16 +15,16 @@ template void test_positive_sequence() { srand(42); - ds2i::global_parameters params; + pisa::global_parameters params; size_t n = 50000; std::vector values(n); std::generate(values.begin(), values.end(), []() { return (rand() % 256) + 1; }); uint64_t universe = std::accumulate(values.begin(), values.end(), 0) + 1; - typedef ds2i::positive_sequence sequence_type; - ds2i::bit_vector_builder bvb; + typedef pisa::positive_sequence sequence_type; + pisa::bit_vector_builder bvb; sequence_type::write(bvb, values.begin(), universe, values.size(), params); - ds2i::bit_vector bv(&bvb); + pisa::bit_vector bv(&bvb); typename sequence_type::enumerator r(bv, 0, universe, values.size(), params); for (size_t i = 0; i < n; ++i) { @@ -38,7 +38,7 @@ void test_positive_sequence() TEST_CASE("positive_sequence") { - test_positive_sequence(); - test_positive_sequence>(); - test_positive_sequence>(); + test_positive_sequence(); + test_positive_sequence>(); + test_positive_sequence>(); } diff --git a/test/test_ranked_queries.cpp b/test/test_ranked_queries.cpp index bdc45fb2c..4b8ef4a59 100644 --- a/test/test_ranked_queries.cpp +++ b/test/test_ranked_queries.cpp @@ -7,7 +7,7 @@ #include "index_types.hpp" #include "query/queries.hpp" -namespace ds2i { namespace test { +namespace pisa { namespace test { struct index_initialization { @@ -75,26 +75,26 @@ namespace ds2i { namespace test { }} -TEST_CASE_METHOD(ds2i::test::index_initialization, "wand") +TEST_CASE_METHOD(pisa::test::index_initialization, "wand") { - ds2i::wand_query wand_q(wdata, 10); + pisa::wand_query wand_q(wdata, 10); test_against_or(wand_q); } -TEST_CASE_METHOD(ds2i::test::index_initialization, "maxscore") +TEST_CASE_METHOD(pisa::test::index_initialization, "maxscore") { - ds2i::maxscore_query maxscore_q(wdata, 10); + pisa::maxscore_query maxscore_q(wdata, 10); test_against_or(maxscore_q); } -TEST_CASE_METHOD(ds2i::test::index_initialization, "block_max_maxscore") +TEST_CASE_METHOD(pisa::test::index_initialization, "block_max_maxscore") { - ds2i::block_max_maxscore_query bmm_q(wdata, 10); + pisa::block_max_maxscore_query bmm_q(wdata, 10); test_against_or(bmm_q); } /// Issue #26 https://github.com/pisa-engine/pisa/issues/26 -TEST_CASE_METHOD(ds2i::test::index_initialization, "topk_size_ranked_or") +TEST_CASE_METHOD(pisa::test::index_initialization, "topk_size_ranked_or") { test_k_size(); } diff --git a/test/test_sample_index.cpp b/test/test_sample_index.cpp index 10f950788..2fef42289 100644 --- a/test/test_sample_index.cpp +++ b/test/test_sample_index.cpp @@ -30,13 +30,13 @@ TEST_CASE( "sample_index_full") { // given - using ds2i::binary_freq_collection; + using pisa::binary_freq_collection; std::string input("test_data/test_collection"); std::string output("temp_collection"); auto original = binary_freq_collection(input.c_str()); // when - ds2i::sample_inverted_index(input, output, 10000); + pisa::sample_inverted_index(input, output, 10000); auto sampled = binary_freq_collection(output.c_str()); // then @@ -57,14 +57,14 @@ TEST_CASE( "sample_index_full") TEST_CASE( "sample_index") { // given - using ds2i::binary_freq_collection; + using pisa::binary_freq_collection; std::string input("test_data/test_collection"); std::string output("temp_collection"); auto original = binary_freq_collection(input.c_str()); size_t doc_limit = 2000; // when - ds2i::sample_inverted_index(input, output, doc_limit); + pisa::sample_inverted_index(input, output, doc_limit); auto sampled = binary_freq_collection(output.c_str()); // then diff --git a/test/test_sequence_collection.cpp b/test/test_sequence_collection.cpp index d715b2812..1d5be5dad 100644 --- a/test/test_sequence_collection.cpp +++ b/test/test_sequence_collection.cpp @@ -16,9 +16,9 @@ template void test_sequence_collection() { - ds2i::global_parameters params; + pisa::global_parameters params; uint64_t universe = 10000; - typedef ds2i::sequence_collection + typedef pisa::sequence_collection collection_type; typename collection_type::builder b(params); @@ -33,13 +33,13 @@ void test_sequence_collection() { collection_type coll; b.build(coll); - ds2i::mapper::freeze(coll, "temp.bin"); + pisa::mapper::freeze(coll, "temp.bin"); } { collection_type coll; mio::mmap_source m("temp.bin"); - ds2i::mapper::map(coll, m); + pisa::mapper::map(coll, m); for (size_t i = 0; i < sequences.size(); ++i) { test_sequence(coll[i], sequences[i]); @@ -49,7 +49,7 @@ void test_sequence_collection() TEST_CASE( "sequence_collection") { - test_sequence_collection(); - test_sequence_collection>(); - test_sequence_collection>(); + test_sequence_collection(); + test_sequence_collection>(); + test_sequence_collection>(); } diff --git a/test/test_strict_elias_fano.cpp b/test/test_strict_elias_fano.cpp index 56605267b..416452c10 100644 --- a/test/test_strict_elias_fano.cpp +++ b/test/test_strict_elias_fano.cpp @@ -10,11 +10,11 @@ TEST_CASE( "strict_elias_fano") { - ds2i::global_parameters params; + pisa::global_parameters params; uint64_t n = 10000; uint64_t universe = uint64_t(2 * n); auto seq = random_sequence(universe, n, true); - test_sequence(ds2i::strict_elias_fano(), params, universe, seq); + test_sequence(pisa::strict_elias_fano(), params, universe, seq); } diff --git a/test/test_uniform_partitioned_sequence.cpp b/test/test_uniform_partitioned_sequence.cpp index 0e6e8954c..5c9374b0f 100644 --- a/test/test_uniform_partitioned_sequence.cpp +++ b/test/test_uniform_partitioned_sequence.cpp @@ -10,21 +10,21 @@ TEST_CASE( "uniform_partitioned_sequence") { - ds2i::global_parameters params; - using ds2i::indexed_sequence; - using ds2i::strict_sequence; + pisa::global_parameters params; + using pisa::indexed_sequence; + using pisa::strict_sequence; // test singleton sequences std::vector short_seq; short_seq.push_back(0); - test_sequence(ds2i::uniform_partitioned_sequence(), + test_sequence(pisa::uniform_partitioned_sequence(), params, 1, short_seq); - test_sequence(ds2i::uniform_partitioned_sequence(), + test_sequence(pisa::uniform_partitioned_sequence(), params, 1, short_seq); short_seq[0] = 1; - test_sequence(ds2i::uniform_partitioned_sequence(), + test_sequence(pisa::uniform_partitioned_sequence(), params, 2, short_seq); - test_sequence(ds2i::uniform_partitioned_sequence(), + test_sequence(pisa::uniform_partitioned_sequence(), params, 2, short_seq); std::vector avg_gaps = { 1.1, 1.9, 2.5, 3, 4, 5, 10 }; @@ -33,9 +33,9 @@ TEST_CASE( "uniform_partitioned_sequence") uint64_t universe = uint64_t(n * avg_gap); auto seq = random_sequence(universe, n, true); - test_sequence(ds2i::uniform_partitioned_sequence(), + test_sequence(pisa::uniform_partitioned_sequence(), params, universe, seq); - test_sequence(ds2i::uniform_partitioned_sequence(), + test_sequence(pisa::uniform_partitioned_sequence(), params, universe, seq); } }