From 34bc3da70a8ba3b838bf129b1bf2c8f9607a72b8 Mon Sep 17 00:00:00 2001 From: Enrico Seiler Date: Thu, 6 Feb 2025 15:12:45 +0100 Subject: [PATCH] [INFRA] Put SDSL into contrib --- .github/workflows/cron_latest_libraries.yml | 2 +- README.md | 1 - cmake/cpack_install.cmake.in | 1 - cmake/package-lock.cmake | 10 - cmake/seqan3-config.cmake | 34 - cmake/seqan3-install.cmake | 1 - .../alphabet/container/bitpacked_sequence.hpp | 3 +- include/seqan3/contrib/sdsl-lite.hpp | 35074 ++++++++++++++++ .../core/detail/customisation_point.hpp | 2 + include/seqan3/core/platform.hpp | 8 - .../io/detail/ignore_output_iterator.hpp | 2 + include/seqan3/io/detail/in_file_iterator.hpp | 1 + .../dream_index/interleaved_bloom_filter.hpp | 3 +- .../search/fm_index/bi_fm_index_cursor.hpp | 3 +- include/seqan3/search/fm_index/concept.hpp | 3 +- include/seqan3/search/fm_index/fm_index.hpp | 3 +- .../search/fm_index/fm_index_cursor.hpp | 3 +- .../utility/container/aligned_allocator.hpp | 1 + include/seqan3/utility/math.hpp | 1 + test/documentation/seqan3_doxygen_cfg.in | 2 +- .../range/container_push_back_benchmark.cpp | 3 +- .../range/container_seq_read_benchmark.cpp | 3 +- .../range/container_seq_write_benchmark.cpp | 3 +- test/scripts/amalgamate-sdsl.sh | 53 + .../core/detail/template_inspection_usage.cpp | 2 + test/snippet/utility/tuple_utility.cpp | 2 + .../container/container_concept_test.cpp | 3 +- 27 files changed, 35150 insertions(+), 77 deletions(-) create mode 100644 include/seqan3/contrib/sdsl-lite.hpp create mode 100755 test/scripts/amalgamate-sdsl.sh diff --git a/.github/workflows/cron_latest_libraries.yml b/.github/workflows/cron_latest_libraries.yml index 5a3908604e..22a3bb687f 100644 --- a/.github/workflows/cron_latest_libraries.yml +++ b/.github/workflows/cron_latest_libraries.yml @@ -51,7 +51,7 @@ jobs: FILE="cmake/package-lock.cmake" sed -i -E 's@(set \(SEQAN3_\S+_VERSION )[^\)]+\)@\1main)@g' $FILE sed -i -E 's@VERSION( \$\{SEQAN3_\S+_VERSION\})@GIT_TAG\1@g' $FILE - sed -i -E 's@SEQAN3_(SDSL|CEREAL)_VERSION main@SEQAN3_\1_VERSION master@g' $FILE + sed -i -E 's@SEQAN3_CEREAL_VERSION main@SEQAN3_\1_VERSION master@g' $FILE cat $FILE - name: Configure tests diff --git a/README.md b/README.md index e0b771bba0..48fc5b24f1 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,6 @@ Please see the [online documentation](https://docs.seqan.de/seqan3/main_user/) f | | [Clang](https://clang.llvm.org) | ≥ 17 | tested with `-stdlib=libc++` | | | [IntelOneAPI]() | ≥ 2024.0 | | |**build system** | [CMake](https://cmake.org) | ≥ 3.20 | optional, but recommended | -|**required libs** | [SDSL](https://github.com/xxsds/sdsl-lite) | ≥ 3.0.3 | | |**optional libs** | [cereal](https://github.com/USCiLab/cereal) | ≥ 1.3.1 | required for serialisation and CTD support | | | [zlib](https://github.com/madler/zlib) | ≥ 1.2 | required for `*.gz` and `.bam` file support | | | [bzip2](https://www.sourceware.org/bzip2) | ≥ 1.0 | required for `*.bz2` file support | diff --git a/cmake/cpack_install.cmake.in b/cmake/cpack_install.cmake.in index 581db45107..97c2635355 100644 --- a/cmake/cpack_install.cmake.in +++ b/cmake/cpack_install.cmake.in @@ -5,7 +5,6 @@ # Only if creating the source package (`make package_source`): # Copy dependency include directories into package's staging folder if (CPACK_SOURCE_INSTALLED_DIRECTORIES) - file (COPY "@SEQAN3_SDSL_INCLUDE_DIR@/" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/include/seqan3/vendor") file (COPY "@SEQAN3_CEREAL_INCLUDE_DIR@/" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/include/seqan3/vendor") configure_file ("@CPM_DOWNLOAD_LOCATION@" "${CMAKE_CURRENT_BINARY_DIR}/cmake/CPM.cmake" COPYONLY) if ("@use_ccache_ADDED@" STREQUAL "YES") diff --git a/cmake/package-lock.cmake b/cmake/package-lock.cmake index 5919ada997..6d77d578b1 100644 --- a/cmake/package-lock.cmake +++ b/cmake/package-lock.cmake @@ -23,16 +23,6 @@ CPMDeclarePackage (cereal GITHUB_REPOSITORY USCiLab/cereal DOWNLOAD_ONLY TRUE QUIET YES) -# sdsl-lite -# Use URL download of the commit archive such that we do not clone submodules -# Package name is still sdsl (name as v2 at xxsds/sdsl), but sdsl-lite is not currently being packaged -# To avoid accidentally using the older sdsl, NAME is set to sdsl-lite -set (SEQAN3_SDSL_VERSION 14cd017027ea742353fc5b500d1cb1d95896b77e CACHE STRING "" FORCE) -CPMDeclarePackage (sdsl-lite - NAME sdsl-lite - URL https://github.com/xxsds/sdsl-lite/archive/${SEQAN3_SDSL_VERSION}.tar.gz # master - DOWNLOAD_ONLY YES - QUIET YES) # benchmark set (SEQAN3_BENCHMARK_VERSION 1.9.1 CACHE STRING "" FORCE) CPMDeclarePackage (benchmark diff --git a/cmake/seqan3-config.cmake b/cmake/seqan3-config.cmake index 451c443a97..0470eea9b5 100644 --- a/cmake/seqan3-config.cmake +++ b/cmake/seqan3-config.cmake @@ -15,10 +15,6 @@ # C++20 # pthread # -# SeqAn requires the following libraries: -# -# SDSL -- the succinct data structure library -# # SeqAn has the following optional dependencies: # # ZLIB -- zlib compression library @@ -137,36 +133,6 @@ else () seqan3_config_error ("SeqAn3 include directory could not be found (SEQAN3_INCLUDE_DIR: '${SEQAN3_INCLUDE_DIR}')") endif () -# ---------------------------------------------------------------------------- -# Require SDSL -# ---------------------------------------------------------------------------- - -find_path (SEQAN3_SDSL_INCLUDE_DIR - NAMES sdsl/version.hpp - HINTS "${SEQAN3_INCLUDE_DIR}/seqan3/vendor") - -# 1) Check the vendor directory of SeqAn3. This directory exists for source packages and installed packages. -if (SEQAN3_SDSL_INCLUDE_DIR) - seqan3_config_print ("Required dependency: SDSL found.") - set (SEQAN3_DEPENDENCY_INCLUDE_DIRS ${SEQAN3_SDSL_INCLUDE_DIR} ${SEQAN3_DEPENDENCY_INCLUDE_DIRS}) - # 2) Get package via CPM. -elseif (SEQAN3_HAS_CPM) - CPMGetPackage (sdsl-lite) - - find_path (SEQAN3_SDSL_INCLUDE_DIR - NAMES sdsl/version.hpp - HINTS "${sdsl-lite_SOURCE_DIR}/include") - - if (SEQAN3_SDSL_INCLUDE_DIR) - seqan3_config_print ("Required dependency: SDSL found.") - set (SEQAN3_DEPENDENCY_INCLUDE_DIRS ${SEQAN3_SDSL_INCLUDE_DIR} ${SEQAN3_DEPENDENCY_INCLUDE_DIRS}) - else () - seqan3_config_error ("The SDSL library is required, but wasn't found.") - endif () -else () - seqan3_config_error ("The SDSL library is required, but wasn't found.") -endif () - # ---------------------------------------------------------------------------- # Force-deactivate optional dependencies # ---------------------------------------------------------------------------- diff --git a/cmake/seqan3-install.cmake b/cmake/seqan3-install.cmake index c8e45ecca3..9514a17a5b 100644 --- a/cmake/seqan3-install.cmake +++ b/cmake/seqan3-install.cmake @@ -21,5 +21,4 @@ install (FILES "${SEQAN3_CLONE_DIR}/cmake/seqan3-config.cmake" "${SEQAN3_CLONE_D # install seqan3 header files in /include/seqan3 install (DIRECTORY "${SEQAN3_INCLUDE_DIR}/seqan3" TYPE INCLUDE) -install (DIRECTORY "${SEQAN3_SDSL_INCLUDE_DIR}/sdsl" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/seqan3/vendor") install (DIRECTORY "${SEQAN3_CEREAL_INCLUDE_DIR}/cereal" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/seqan3/vendor") diff --git a/include/seqan3/alphabet/container/bitpacked_sequence.hpp b/include/seqan3/alphabet/container/bitpacked_sequence.hpp index 0eca90f7d1..ae3eadff7d 100644 --- a/include/seqan3/alphabet/container/bitpacked_sequence.hpp +++ b/include/seqan3/alphabet/container/bitpacked_sequence.hpp @@ -14,11 +14,10 @@ #include #include -#include - #include #include #include +#include #include #include #include diff --git a/include/seqan3/contrib/sdsl-lite.hpp b/include/seqan3/contrib/sdsl-lite.hpp new file mode 100644 index 0000000000..ec84b9f1d1 --- /dev/null +++ b/include/seqan3/contrib/sdsl-lite.hpp @@ -0,0 +1,35074 @@ +// SPDX-FileCopyrightText: 2016 SDSL Project Authors +// SPDX-License-Identifier: BSD-3-Clause + +// This file was generated by https://github.com/seqan/seqan3/blob/main/test/scripts/amalgamate-sdsl.sh + +#pragma once + +// clang-format off +#ifndef INCLUDED_SDSL_BITVECTORS +#define INCLUDED_SDSL_BITVECTORS +#ifndef SDSL_BIT_VECTOR_IL +#define SDSL_BIT_VECTOR_IL +#include +#include +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_BITS +#define INCLUDED_SDSL_BITS +#if defined(__x86_64__) +#include +#endif +#if defined(__aarch64__) || defined(_M_ARM64) +#include +#endif +#if defined(__powerpc__) || defined(__powerpc64__) +#include +#endif +#include +#include +#ifdef __SSE4_2__ +# include +#endif +#ifdef __BMI2__ +# include +#endif +#ifdef WIN32 +# include +#endif +namespace sdsl +{ +template +struct bits_impl +{ + bits_impl() = delete; + static constexpr uint64_t all_set{-1ULL}; + static constexpr uint64_t deBruijn64{0x0218A392CD3D5DBFULL}; + static constexpr uint32_t lt_deBruijn_to_idx[64] = {0, 1, 2, 7, 3, 13, 8, 19, 4, 25, 14, 28, 9, 34, 20, 40, + 5, 17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57, + 63, 6, 12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56, + 62, 11, 23, 32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58}; + static constexpr uint64_t lt_fib[92] = {1, + 2, + 3, + 5, + 8, + 13, + 21, + 34, + 55, + 89, + 144, + 233, + 377, + 610, + 987, + 1597, + 2584, + 4181, + 6765, + 10946, + 17711, + 28657, + 46368, + 75025, + 121393, + 196418, + 317811, + 514229, + 832040, + 1346269, + 2178309, + 3524578, + 5702887, + 9227465, + 14930352, + 24157817, + 39088169, + 63245986, + 102334155, + 165580141, + 267914296, + 433494437, + 701408733, + 1134903170, + 1836311903, + 2971215073ULL, + 0x11e8d0a40ULL, + 0x1cfa62f21ULL, + 0x2ee333961ULL, + 0x4bdd96882ULL, + 0x7ac0ca1e3ULL, + 0xc69e60a65ULL, + 0x1415f2ac48ULL, + 0x207fd8b6adULL, + 0x3495cb62f5ULL, + 0x5515a419a2ULL, + 0x89ab6f7c97ULL, + 0xdec1139639ULL, + 0x1686c8312d0ULL, + 0x2472d96a909ULL, + 0x3af9a19bbd9ULL, + 0x5f6c7b064e2ULL, + 0x9a661ca20bbULL, + 0xf9d297a859dULL, + 0x19438b44a658ULL, + 0x28e0b4bf2bf5ULL, + 0x42244003d24dULL, + 0x6b04f4c2fe42ULL, + 0xad2934c6d08fULL, + 0x1182e2989ced1ULL, + 0x1c5575e509f60ULL, + 0x2dd8587da6e31ULL, + 0x4a2dce62b0d91ULL, + 0x780626e057bc2ULL, + 0xc233f54308953ULL, + 0x13a3a1c2360515ULL, + 0x1fc6e116668e68ULL, + 0x336a82d89c937dULL, + 0x533163ef0321e5ULL, + 0x869be6c79fb562ULL, + 0xd9cd4ab6a2d747ULL, + 0x16069317e428ca9ULL, + 0x23a367c34e563f0ULL, + 0x39a9fadb327f099ULL, + 0x5d4d629e80d5489ULL, + 0x96f75d79b354522ULL, + 0xf444c01834299abULL, + 0x18b3c1d91e77decdULL, + 0x27f80ddaa1ba7878ULL, + 0x40abcfb3c0325745ULL, + 0x68a3dd8e61eccfbdULL, + 0xa94fad42221f2702ULL}; + static constexpr uint8_t lt_cnt[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, + 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, + 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, + 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, + 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, + 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, + 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; + static constexpr uint32_t lt_hi[256] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; + static constexpr uint64_t lo_set[65] = { + 0x0000000000000000ULL, 0x0000000000000001ULL, 0x0000000000000003ULL, 0x0000000000000007ULL, + 0x000000000000000FULL, 0x000000000000001FULL, 0x000000000000003FULL, 0x000000000000007FULL, + 0x00000000000000FFULL, 0x00000000000001FFULL, 0x00000000000003FFULL, 0x00000000000007FFULL, + 0x0000000000000FFFULL, 0x0000000000001FFFULL, 0x0000000000003FFFULL, 0x0000000000007FFFULL, + 0x000000000000FFFFULL, 0x000000000001FFFFULL, 0x000000000003FFFFULL, 0x000000000007FFFFULL, + 0x00000000000FFFFFULL, 0x00000000001FFFFFULL, 0x00000000003FFFFFULL, 0x00000000007FFFFFULL, + 0x0000000000FFFFFFULL, 0x0000000001FFFFFFULL, 0x0000000003FFFFFFULL, 0x0000000007FFFFFFULL, + 0x000000000FFFFFFFULL, 0x000000001FFFFFFFULL, 0x000000003FFFFFFFULL, 0x000000007FFFFFFFULL, + 0x00000000FFFFFFFFULL, 0x00000001FFFFFFFFULL, 0x00000003FFFFFFFFULL, 0x00000007FFFFFFFFULL, + 0x0000000FFFFFFFFFULL, 0x0000001FFFFFFFFFULL, 0x0000003FFFFFFFFFULL, 0x0000007FFFFFFFFFULL, + 0x000000FFFFFFFFFFULL, 0x000001FFFFFFFFFFULL, 0x000003FFFFFFFFFFULL, 0x000007FFFFFFFFFFULL, + 0x00000FFFFFFFFFFFULL, 0x00001FFFFFFFFFFFULL, 0x00003FFFFFFFFFFFULL, 0x00007FFFFFFFFFFFULL, + 0x0000FFFFFFFFFFFFULL, 0x0001FFFFFFFFFFFFULL, 0x0003FFFFFFFFFFFFULL, 0x0007FFFFFFFFFFFFULL, + 0x000FFFFFFFFFFFFFULL, 0x001FFFFFFFFFFFFFULL, 0x003FFFFFFFFFFFFFULL, 0x007FFFFFFFFFFFFFULL, + 0x00FFFFFFFFFFFFFFULL, 0x01FFFFFFFFFFFFFFULL, 0x03FFFFFFFFFFFFFFULL, 0x07FFFFFFFFFFFFFFULL, + 0x0FFFFFFFFFFFFFFFULL, 0x1FFFFFFFFFFFFFFFULL, 0x3FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL, + 0xFFFFFFFFFFFFFFFFULL}; + static constexpr uint64_t lo_unset[65] = { + 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFEULL, 0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFF8ULL, + 0xFFFFFFFFFFFFFFF0ULL, 0xFFFFFFFFFFFFFFE0ULL, 0xFFFFFFFFFFFFFFC0ULL, 0xFFFFFFFFFFFFFF80ULL, + 0xFFFFFFFFFFFFFF00ULL, 0xFFFFFFFFFFFFFE00ULL, 0xFFFFFFFFFFFFFC00ULL, 0xFFFFFFFFFFFFF800ULL, + 0xFFFFFFFFFFFFF000ULL, 0xFFFFFFFFFFFFE000ULL, 0xFFFFFFFFFFFFC000ULL, 0xFFFFFFFFFFFF8000ULL, + 0xFFFFFFFFFFFF0000ULL, 0xFFFFFFFFFFFE0000ULL, 0xFFFFFFFFFFFC0000ULL, 0xFFFFFFFFFFF80000ULL, + 0xFFFFFFFFFFF00000ULL, 0xFFFFFFFFFFE00000ULL, 0xFFFFFFFFFFC00000ULL, 0xFFFFFFFFFF800000ULL, + 0xFFFFFFFFFF000000ULL, 0xFFFFFFFFFE000000ULL, 0xFFFFFFFFFC000000ULL, 0xFFFFFFFFF8000000ULL, + 0xFFFFFFFFF0000000ULL, 0xFFFFFFFFE0000000ULL, 0xFFFFFFFFC0000000ULL, 0xFFFFFFFF80000000ULL, + 0xFFFFFFFF00000000ULL, 0xFFFFFFFE00000000ULL, 0xFFFFFFFC00000000ULL, 0xFFFFFFF800000000ULL, + 0xFFFFFFF000000000ULL, 0xFFFFFFE000000000ULL, 0xFFFFFFC000000000ULL, 0xFFFFFF8000000000ULL, + 0xFFFFFF0000000000ULL, 0xFFFFFE0000000000ULL, 0xFFFFFC0000000000ULL, 0xFFFFF80000000000ULL, + 0xFFFFF00000000000ULL, 0xFFFFE00000000000ULL, 0xFFFFC00000000000ULL, 0xFFFF800000000000ULL, + 0xFFFF000000000000ULL, 0xFFFE000000000000ULL, 0xFFFC000000000000ULL, 0xFFF8000000000000ULL, + 0xFFF0000000000000ULL, 0xFFE0000000000000ULL, 0xFFC0000000000000ULL, 0xFF80000000000000ULL, + 0xFF00000000000000ULL, 0xFE00000000000000ULL, 0xFC00000000000000ULL, 0xF800000000000000ULL, + 0xF000000000000000ULL, 0xE000000000000000ULL, 0xC000000000000000ULL, 0x8000000000000000ULL, + 0x0000000000000000ULL}; + static constexpr uint8_t lt_lo[256] = { + 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, + 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00, + 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, + 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, + 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, + 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, + 0x01, 0x00, 0x07, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, + 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, + 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, + 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, + 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x05, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, + 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, + 0x02, 0x00, 0x01, 0x00}; + static constexpr uint8_t lt_sel[256 * 8] = { + 0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, + 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, + 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, + 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, + 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, + 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, + 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 0, 0, 0, 1, 0, 2, 2, 1, 0, 3, 3, 1, 3, 2, 2, 1, 0, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 0, 5, 5, 1, 5, + 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 0, 6, 6, 1, 6, 2, 2, 1, 6, 3, + 3, 1, 3, 2, 2, 1, 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, + 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 0, 7, 7, 1, 7, 2, 2, 1, 7, 3, 3, 1, 3, 2, 2, 1, 7, 4, 4, 1, + 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, + 3, 3, 1, 3, 2, 2, 1, 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, + 2, 1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, + 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 3, 0, 3, 3, 2, 0, 0, 0, 4, 0, 4, 4, 2, 0, 4, 4, 3, 4, 3, 3, 2, 0, 0, 0, 5, 0, + 5, 5, 2, 0, 5, 5, 3, 5, 3, 3, 2, 0, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 0, 0, 0, 6, 0, 6, 6, 2, 0, 6, + 6, 3, 6, 3, 3, 2, 0, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, 3, 2, 0, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, + 2, 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 0, 0, 0, 7, 0, 7, 7, 2, 0, 7, 7, 3, 7, 3, 3, 2, 0, 7, 7, 4, + 7, 4, 4, 2, 7, 4, 4, 3, 4, 3, 3, 2, 0, 7, 7, 5, 7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2, 7, 5, 5, 4, 5, 4, 4, 2, 5, + 4, 4, 3, 4, 3, 3, 2, 0, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2, 7, 6, 6, 4, 6, 4, 4, 2, 6, 4, 4, 3, 4, 3, + 3, 2, 7, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 4, 4, 3, 0, 0, 0, 0, 0, + 0, 0, 5, 0, 0, 0, 5, 0, 5, 5, 3, 0, 0, 0, 5, 0, 5, 5, 4, 0, 5, 5, 4, 5, 4, 4, 3, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, + 0, 6, 0, 6, 6, 3, 0, 0, 0, 6, 0, 6, 6, 4, 0, 6, 6, 4, 6, 4, 4, 3, 0, 0, 0, 6, 0, 6, 6, 5, 0, 6, 6, 5, 6, 5, 5, + 3, 0, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, 7, 3, 0, 0, 0, 7, + 0, 7, 7, 4, 0, 7, 7, 4, 7, 4, 4, 3, 0, 0, 0, 7, 0, 7, 7, 5, 0, 7, 7, 5, 7, 5, 5, 3, 0, 7, 7, 5, 7, 5, 5, 4, 7, + 5, 5, 4, 5, 4, 4, 3, 0, 0, 0, 7, 0, 7, 7, 6, 0, 7, 7, 6, 7, 6, 6, 3, 0, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4, 6, 4, + 4, 3, 0, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3, 7, 6, 6, 5, 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 5, 0, 5, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 6, 0, 6, 6, 4, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 6, 0, 6, 6, + 5, 0, 0, 0, 6, 0, 6, 6, 5, 0, 6, 6, 5, 6, 5, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, + 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, 7, 4, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, 7, 5, 0, 0, 0, 7, 0, 7, 7, 5, 0, + 7, 7, 5, 7, 5, 5, 4, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, 7, 6, 0, 0, 0, 7, 0, 7, 7, 6, 0, 7, 7, 6, 7, 6, + 6, 4, 0, 0, 0, 7, 0, 7, 7, 6, 0, 7, 7, 6, 7, 6, 6, 5, 0, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 4, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 6, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 6, 0, 6, 6, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, + 0, 0, 7, 0, 7, 7, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, + 7, 6, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, 7, 6, 0, 0, 0, 7, 0, 7, 7, 6, 0, 7, 7, 6, 7, 6, 6, 5, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 7, 0, 7, 7, 6, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7}; + static constexpr uint64_t ps_overflow[65] = { + 0x8080808080808080ULL, 0x7f7f7f7f7f7f7f7fULL, 0x7e7e7e7e7e7e7e7eULL, 0x7d7d7d7d7d7d7d7dULL, + 0x7c7c7c7c7c7c7c7cULL, 0x7b7b7b7b7b7b7b7bULL, 0x7a7a7a7a7a7a7a7aULL, 0x7979797979797979ULL, + 0x7878787878787878ULL, 0x7777777777777777ULL, 0x7676767676767676ULL, 0x7575757575757575ULL, + 0x7474747474747474ULL, 0x7373737373737373ULL, 0x7272727272727272ULL, 0x7171717171717171ULL, + 0x7070707070707070ULL, 0x6f6f6f6f6f6f6f6fULL, 0x6e6e6e6e6e6e6e6eULL, 0x6d6d6d6d6d6d6d6dULL, + 0x6c6c6c6c6c6c6c6cULL, 0x6b6b6b6b6b6b6b6bULL, 0x6a6a6a6a6a6a6a6aULL, 0x6969696969696969ULL, + 0x6868686868686868ULL, 0x6767676767676767ULL, 0x6666666666666666ULL, 0x6565656565656565ULL, + 0x6464646464646464ULL, 0x6363636363636363ULL, 0x6262626262626262ULL, 0x6161616161616161ULL, + 0x6060606060606060ULL, 0x5f5f5f5f5f5f5f5fULL, 0x5e5e5e5e5e5e5e5eULL, 0x5d5d5d5d5d5d5d5dULL, + 0x5c5c5c5c5c5c5c5cULL, 0x5b5b5b5b5b5b5b5bULL, 0x5a5a5a5a5a5a5a5aULL, 0x5959595959595959ULL, + 0x5858585858585858ULL, 0x5757575757575757ULL, 0x5656565656565656ULL, 0x5555555555555555ULL, + 0x5454545454545454ULL, 0x5353535353535353ULL, 0x5252525252525252ULL, 0x5151515151515151ULL, + 0x5050505050505050ULL, 0x4f4f4f4f4f4f4f4fULL, 0x4e4e4e4e4e4e4e4eULL, 0x4d4d4d4d4d4d4d4dULL, + 0x4c4c4c4c4c4c4c4cULL, 0x4b4b4b4b4b4b4b4bULL, 0x4a4a4a4a4a4a4a4aULL, 0x4949494949494949ULL, + 0x4848484848484848ULL, 0x4747474747474747ULL, 0x4646464646464646ULL, 0x4545454545454545ULL, + 0x4444444444444444ULL, 0x4343434343434343ULL, 0x4242424242424242ULL, 0x4141414141414141ULL, + 0x4040404040404040ULL}; + static constexpr uint64_t cnt(uint64_t x); + static constexpr uint32_t hi(uint64_t x); + static constexpr uint32_t lo(uint64_t x); + static constexpr uint32_t cnt32(uint32_t x); + static constexpr uint32_t cnt11(uint64_t x, uint64_t & c); + static constexpr uint32_t cnt11(uint64_t x); + static constexpr uint32_t cnt10(uint64_t x, uint64_t & c); + static constexpr uint32_t cnt01(uint64_t x, uint64_t & c); + static constexpr uint64_t map10(uint64_t x, uint64_t c = 0); + static constexpr uint64_t map01(uint64_t x, uint64_t c = 1); + static constexpr uint32_t sel(uint64_t x, uint32_t i); + static constexpr uint32_t _sel(uint64_t x, uint32_t i); + static constexpr uint32_t sel11(uint64_t x, uint32_t i, uint32_t c = 0); + static constexpr uint32_t hi11(uint64_t x); + static constexpr void write_int(uint64_t * word, uint64_t x, uint8_t offset = 0, const uint8_t len = 64); + static constexpr void write_int_and_move(uint64_t *& word, uint64_t x, uint8_t & offset, const uint8_t len); + static constexpr uint64_t read_int(uint64_t const * word, uint8_t offset = 0, const uint8_t len = 64); + static constexpr uint64_t read_int_bounded(uint64_t const * word, uint8_t offset = 0, const uint8_t len = 64); + static constexpr uint64_t read_int_and_move(uint64_t const *& word, uint8_t & offset, const uint8_t len = 64); + static constexpr uint64_t read_unary(uint64_t const * word, uint8_t offset = 0); + static constexpr uint64_t read_unary_bounded(uint64_t const * word, uint8_t offset = 0); + static constexpr uint64_t read_unary_and_move(uint64_t const *& word, uint8_t & offset); + static constexpr void move_right(uint64_t const *& word, uint8_t & offset, const uint8_t len); + static constexpr void move_left(uint64_t const *& word, uint8_t & offset, const uint8_t len); + static constexpr uint64_t next(uint64_t const * word, uint64_t idx); + static constexpr uint64_t prev(uint64_t const * word, uint64_t idx); + static constexpr uint64_t rev(uint64_t x); +}; +template +constexpr uint64_t bits_impl::cnt(uint64_t x) +{ +#ifdef __SSE4_2__ + return __builtin_popcountll(x); +#else +# ifdef POPCOUNT_TL + return lt_cnt[x & 0xFFULL] + lt_cnt[(x >> 8) & 0xFFULL] + lt_cnt[(x >> 16) & 0xFFULL] + lt_cnt[(x >> 24) & 0xFFULL] + + lt_cnt[(x >> 32) & 0xFFULL] + lt_cnt[(x >> 40) & 0xFFULL] + lt_cnt[(x >> 48) & 0xFFULL] + + lt_cnt[(x >> 56) & 0xFFULL]; +# else + x = x - ((x >> 1) & 0x5555555555555555ull); + x = (x & 0x3333333333333333ull) + ((x >> 2) & 0x3333333333333333ull); + x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0full; + return (0x0101010101010101ull * x >> 56); +# endif +#endif +} +template +constexpr uint32_t bits_impl::cnt32(uint32_t x) +{ +#ifdef __SSE4_2__ + return __builtin_popcount(x); +#else + x = x - ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + return (0x10101010 * x >> 28) + (0x01010101 * x >> 28); +#endif +} +template +constexpr uint32_t bits_impl::cnt11(uint64_t x, uint64_t & c) +{ + uint64_t t1 = x ^ 0x5555555555555555ULL; + uint64_t t2 = t1 + 0x5555555555555555ULL + c; + c = t1 > t2; + return cnt((t2 ^ 0x5555555555555555ULL) & x); +} +template +constexpr uint32_t bits_impl::cnt11(uint64_t x) +{ + return cnt((((x ^ 0x5555555555555555ULL) + 0x5555555555555555ULL) ^ 0x5555555555555555ULL) & x); +} +template +constexpr uint32_t bits_impl::cnt10(uint64_t x, uint64_t & c) +{ + uint32_t res = cnt(((x << 1) | c) & (~x)); + c = (x >> 63); + return res; +} +template +constexpr uint64_t bits_impl::map10(uint64_t x, uint64_t c) +{ + return (((x << 1) | c) & (~x)); +} +template +constexpr uint32_t bits_impl::cnt01(uint64_t x, uint64_t & c) +{ + uint32_t res = cnt((x ^ ((x << 1) | c)) & x); + c = (x >> 63); + return res; +} +template +constexpr uint64_t bits_impl::map01(uint64_t x, uint64_t c) +{ + return ((x ^ ((x << 1) | c)) & x); +} +template +constexpr uint32_t bits_impl::sel(uint64_t x, uint32_t i) +{ +#if defined(__BMI__) && defined(__BMI2__) + return _tzcnt_u64(_pdep_u64(1ULL << (i - 1), x)); +#endif +#ifdef __SSE4_2__ + uint64_t s = x, b{}; + s = s - ((s >> 1) & 0x5555555555555555ULL); + s = (s & 0x3333333333333333ULL) + ((s >> 2) & 0x3333333333333333ULL); + s = (s + (s >> 4)) & 0x0F0F0F0F0F0F0F0FULL; + s = 0x0101010101010101ULL * s; + b = (s + ps_overflow[i]) & 0x8080808080808080ULL; + int byte_nr = __builtin_ctzll(b) >> 3; + s <<= 8; + i -= (s >> (byte_nr << 3)) & 0xFFULL; + return (byte_nr << 3) + lt_sel[((i - 1) << 8) + ((x >> (byte_nr << 3)) & 0xFFULL)]; +#endif + return _sel(x, i); +} +template +constexpr uint32_t bits_impl::_sel(uint64_t x, uint32_t i) +{ + uint64_t s = x, b{}; + s = s - ((s >> 1) & 0x5555555555555555ULL); + s = (s & 0x3333333333333333ULL) + ((s >> 2) & 0x3333333333333333ULL); + s = (s + (s >> 4)) & 0x0F0F0F0F0F0F0F0FULL; + s = 0x0101010101010101ULL * s; + b = (s + ps_overflow[i]); + i = (i - 1) << 8; + if (b & 0x0000000080000000ULL) + if (b & 0x0000000000008000ULL) + if (b & 0x0000000000000080ULL) + return lt_sel[(x & 0xFFULL) + i]; + else + return 8 + lt_sel[(((x >> 8) & 0xFFULL) + i - ((s & 0xFFULL) << 8)) & 0x7FFULL]; + else + if (b & 0x0000000000800000ULL) + return 16 + lt_sel[(((x >> 16) & 0xFFULL) + i - (s & 0xFF00ULL)) & 0x7FFULL]; + else + return 24 + lt_sel[(((x >> 24) & 0xFFULL) + i - ((s >> 8) & 0xFF00ULL)) & 0x7FFULL]; + else + if (b & 0x0000800000000000ULL) + if (b & 0x0000008000000000ULL) + return 32 + lt_sel[(((x >> 32) & 0xFFULL) + i - ((s >> 16) & 0xFF00ULL)) & 0x7FFULL]; + else + return 40 + lt_sel[(((x >> 40) & 0xFFULL) + i - ((s >> 24) & 0xFF00ULL)) & 0x7FFULL]; + else + if (b & 0x0080000000000000ULL) + return 48 + lt_sel[(((x >> 48) & 0xFFULL) + i - ((s >> 32) & 0xFF00ULL)) & 0x7FFULL]; + else + return 56 + lt_sel[(((x >> 56) & 0xFFULL) + i - ((s >> 40) & 0xFF00ULL)) & 0x7FFULL]; + return 0; +} +template +constexpr uint32_t bits_impl::hi(uint64_t x) +{ +#ifdef __SSE4_2__ + if (x == 0) + return 0; + return 63 - __builtin_clzll(x); +#else + uint64_t t{}, tt{}; + if ((tt = x >> 32)) + { + if ((t = tt >> 16)) + { + return (tt = t >> 8) ? 56 + lt_hi[tt] : 48 + lt_hi[t]; + } + else + { + return (t = tt >> 8) ? 40 + lt_hi[t] : 32 + lt_hi[tt]; + } + } + else + { + if ((t = x >> 16)) + { + return (tt = t >> 8) ? 24 + lt_hi[tt] : 16 + lt_hi[t]; + } + else + { + return (tt = x >> 8) ? 8 + lt_hi[tt] : lt_hi[x]; + } + } +#endif +} +template +constexpr uint32_t bits_impl::lo(uint64_t x) +{ +#ifdef __SSE4_2__ + if (x == 0) + return 0; + return __builtin_ctzll(x); +#else + if (x & 1) + return 0; + if (x & 3) + return 1; + if (x & 7) + return 2; + if (x & 0x7FF) + { + return lt_lo[(x & 0x7FF) >> 3] + 3; + } + return lt_deBruijn_to_idx[((x & -x) * deBruijn64) >> 58]; +#endif +} +template +constexpr uint32_t bits_impl::hi11(uint64_t x) +{ + return hi((((x ^ 0x5555555555555555ULL) + 0x5555555555555555ULL) ^ 0x5555555555555555ULL) & x); +} +template +constexpr uint32_t bits_impl::sel11(uint64_t x, uint32_t i, uint32_t c) +{ + return sel((((x ^ 0x5555555555555555ULL) + 0x5555555555555555ULL + c) ^ 0x5555555555555555ULL) & x, i); +} +template +constexpr void bits_impl::write_int(uint64_t * word, uint64_t x, uint8_t offset, const uint8_t len) +{ + x &= bits_impl::lo_set[len]; + if (offset + len < 64) + { + *word &= ((bits_impl::all_set << (offset + len)) | bits_impl::lo_set[offset]); + *word |= (x << offset); + } + else + { + *word &= ((bits_impl::lo_set[offset])); + *word |= (x << offset); + if ((offset = (offset + len) & 0x3F)) + { + *(word + 1) &= (~bits_impl::lo_set[offset]); + *(word + 1) |= (x >> (len - offset)); + } + } +} +template +constexpr void bits_impl::write_int_and_move(uint64_t *& word, uint64_t x, uint8_t & offset, const uint8_t len) +{ + x &= bits_impl::lo_set[len]; + if (offset + len < 64) + { + *word &= ((bits_impl::all_set << (offset + len)) | bits_impl::lo_set[offset]); + *word |= (x << offset); + offset += len; + } + else + { + *word &= ((bits_impl::lo_set[offset])); + *word |= (x << offset); + if ((offset = (offset + len)) > 64) + { + offset &= 0x3F; + *(++word) &= (~bits_impl::lo_set[offset]); + *word |= (x >> (len - offset)); + } + else + { + offset = 0; + ++word; + } + } +} +template +constexpr uint64_t bits_impl::read_int(uint64_t const * word, uint8_t offset, const uint8_t len) +{ + uint64_t w1 = (*word) >> offset; + if ((offset + len) > 64) + { + return w1 | + ((*(word + 1) & bits_impl::lo_set[(offset + len) & 0x3F]) + << (64 - offset)); + } + else + { + return w1 & bits_impl::lo_set[len]; + } +} +template +constexpr uint64_t bits_impl::read_int_bounded(uint64_t const * word, uint8_t offset, const uint8_t len) +{ + return ((*word) >> offset) & bits_impl::lo_set[len]; +} +template +constexpr uint64_t bits_impl::read_int_and_move(uint64_t const *& word, uint8_t & offset, const uint8_t len) +{ + uint64_t w1 = (*word) >> offset; + if ((offset = (offset + len)) >= 64) + { + if (offset == 64) + { + offset &= 0x3F; + ++word; + return w1; + } + else + { + offset &= 0x3F; + return w1 | (((*(++word)) & bits_impl::lo_set[offset]) << (len - offset)); + } + } + else + { + return w1 & bits_impl::lo_set[len]; + } +} +template +constexpr uint64_t bits_impl::read_unary(uint64_t const * word, uint8_t offset) +{ + uint64_t w = *word >> offset; + if (w) + { + return bits_impl::lo(w); + } + else + { + if (0 != (w = *(++word))) + return bits_impl::lo(w) + 64 - offset; + uint64_t cnt = 2; + while (0 == (w = *(++word))) + ++cnt; + return bits_impl::lo(w) + (cnt << 6) - offset; + } + return 0; +} +template +constexpr uint64_t bits_impl::read_unary_bounded(uint64_t const * word, uint8_t offset) +{ + uint64_t w = *word >> offset; + if (w) + { + return bits_impl::lo(w); + } + else + { + return 0; + } +} +template +constexpr uint64_t bits_impl::read_unary_and_move(uint64_t const *& word, uint8_t & offset) +{ + uint64_t w = (*word) >> offset; + if (w) + { + uint8_t r = bits_impl::lo(w); + offset = (offset + r + 1) & 0x3F; + word += (offset == 0); + return r; + } + else + { + uint8_t rr = 0; + if (0 != (w = *(++word))) + { + rr = bits_impl::lo(w) + 64 - offset; + offset = (offset + rr + 1) & 0x3F; + word += (offset == 0); + return rr; + } + else + { + uint64_t cnt_1 = 1; + while (0 == (w = *(++word))) + ++cnt_1; + rr = bits_impl::lo(w) + 64 - offset; + offset = (offset + rr + 1) & 0x3F; + word += (offset == 0); + return ((cnt_1) << 6) + rr; + } + } + return 0; +} +template +constexpr void bits_impl::move_right(uint64_t const *& word, uint8_t & offset, const uint8_t len) +{ + if ((offset += len) & 0xC0) + { + offset &= 0x3F; + ++word; + } +} +template +constexpr void bits_impl::move_left(uint64_t const *& word, uint8_t & offset, const uint8_t len) +{ + if ((offset -= len) & 0xC0) + { + offset &= 0x3F; + --word; + } +} +template +constexpr uint64_t bits_impl::next(uint64_t const * word, uint64_t idx) +{ + word += (idx >> 6); + if (*word & ~lo_set[idx & 0x3F]) + { + return (idx & ~((size_t)0x3F)) + lo(*word & ~lo_set[idx & 0x3F]); + } + idx = (idx & ~((size_t)0x3F)) + 64; + ++word; + while (*word == 0) + { + idx += 64; + ++word; + } + return idx + lo(*word); +} +template +constexpr uint64_t bits_impl::prev(uint64_t const * word, uint64_t idx) +{ + word += (idx >> 6); + if (*word & lo_set[(idx & 0x3F) + 1]) + { + return (idx & ~((size_t)0x3F)) + hi(*word & lo_set[(idx & 0x3F) + 1]); + } + idx = (idx & ~((size_t)0x3F)) - 64; + --word; + while (*word == 0) + { + idx -= 64; + --word; + } + return idx + hi(*word); +} +template +constexpr uint64_t bits_impl::rev(uint64_t x) +{ + x = ((x & 0x5555555555555555ULL) << 1) | ((x & 0xAAAAAAAAAAAAAAAAULL) >> 1); + x = ((x & 0x3333333333333333ULL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCULL) >> 2); + x = ((x & 0x0F0F0F0F0F0F0F0FULL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0ULL) >> 4); + x = ((x & 0x00FF00FF00FF00FFULL) << 8) | ((x & 0xFF00FF00FF00FF00ULL) >> 8); + x = ((x & 0x0000FFFF0000FFFFULL) << 16) | ((x & 0xFFFF0000FFFF0000ULL) >> 16); + x = ((x & 0x00000000FFFFFFFFULL) << 32) | ((x & 0xFFFFFFFF00000000ULL) >> 32); + return x; +} +template +constexpr uint8_t bits_impl::lt_cnt[256]; +template +constexpr uint32_t bits_impl::lt_deBruijn_to_idx[64]; +template +constexpr uint32_t bits_impl::lt_hi[256]; +template +constexpr uint64_t bits_impl::lo_set[65]; +template +constexpr uint64_t bits_impl::lo_unset[65]; +template +constexpr uint64_t bits_impl::ps_overflow[65]; +template +constexpr uint8_t bits_impl::lt_sel[256 * 8]; +template +constexpr uint64_t bits_impl::lt_fib[92]; +template +constexpr uint8_t bits_impl::lt_lo[256]; +using bits = bits_impl<>; +} +#endif +#ifndef INCLUDED_SDSL_CEREAL +#define INCLUDED_SDSL_CEREAL +#include +#if defined(__has_include) +# if __has_include() +#define SDSL_HAS_CEREAL 1 +# include +# include +# include +# include +# include +# include +# include +# include +# include +# endif +#endif +#ifndef SDSL_HAS_CEREAL +#define SDSL_HAS_CEREAL 0 +#define CEREAL_NVP(X) X +#define CEREAL_SERIALIZE_FUNCTION_NAME serialize +#define CEREAL_LOAD_FUNCTION_NAME load +#define CEREAL_SAVE_FUNCTION_NAME save +#define CEREAL_LOAD_MINIMAL_FUNCTION_NAME load_minimal +#define CEREAL_SAVE_MINIMAL_FUNCTION_NAME save_minimal +namespace cereal +{ +namespace traits +{ +template +struct is_output_serializable +{ + using value = std::false_type; +}; +template +struct is_input_serializable +{ + using value = std::false_type; +}; +} +template +struct BinaryData +{}; +template +void make_nvp(t1 const &, t2 const &) +{} +template +void make_size_tag(t const &) +{} +template +t1 binary_data(t1 const &, t2 const &) +{} +} +#endif +#endif +#ifndef INCLUDED_SDSL_INT_VECTOR +#define INCLUDED_SDSL_INT_VECTOR +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef SDSL_CONFIG +#define SDSL_CONFIG +#include +#include +#include +#ifndef MSVC_COMPILER +#define SDSL_UNUSED __attribute__((unused)) +#else +#define SDSL_UNUSED +#endif +namespace sdsl +{ +namespace util +{ +template +std::string to_string(T const & t, int w = 1); +uint64_t pid(); +uint64_t id(); +} +namespace conf +{ +const uint64_t SDSL_BLOCK_SIZE = (uint64_t)1 << 22; +constexpr char KEY_BWT[] = "bwt"; +constexpr char KEY_BWT_INT[] = "bwt_int"; +constexpr char KEY_SA[] = "sa"; +constexpr char KEY_CSA[] = "csa"; +constexpr char KEY_CST[] = "cst"; +constexpr char KEY_ISA[] = "isa"; +constexpr char KEY_TEXT[] = "text"; +constexpr char KEY_TEXT_INT[] = "text_int"; +constexpr char KEY_PSI[] = "psi"; +constexpr char KEY_LCP[] = "lcp"; +constexpr char KEY_SAMPLE_CHAR[] = "sample_char"; +} +typedef uint64_t int_vector_size_type; +typedef std::map tMSS; +enum format_type +{ + JSON_FORMAT, + R_FORMAT, + HTML_FORMAT +}; +enum byte_sa_algo_type +{ + LIBDIVSUFSORT, + SE_SAIS +}; +struct cache_config +{ + bool delete_files; + bool delete_data; + std::string dir; + std::string id; + tMSS file_map; + cache_config(bool f_delete_files = true, + std::string f_dir = "./", + std::string f_id = "", + tMSS f_file_map = tMSS()) : + delete_files(f_delete_files), + delete_data(false), + dir(f_dir), + id(f_id), + file_map(f_file_map) + { + if ("" == id) + { + id = sdsl::util::to_string(sdsl::util::pid()) + "_" + sdsl::util::to_string(sdsl::util::id()); + } + } +}; +template +struct key_text_trait_impl +{ + static char const * KEY_TEXT; +}; +template +struct key_text_trait_impl<0, T> +{ + static char const * KEY_TEXT; +}; +template +struct key_text_trait_impl<8, T> +{ + static char const * KEY_TEXT; +}; +template +struct key_bwt_trait_impl +{ + static char const * KEY_BWT; +}; +template +struct key_bwt_trait_impl<0, T> +{ + static char const * KEY_BWT; +}; +template +struct key_bwt_trait_impl<8, T> +{ + static char const * KEY_BWT; +}; +template +char const * key_text_trait_impl<0, T>::KEY_TEXT = conf::KEY_TEXT_INT; +template +char const * key_text_trait_impl<8, T>::KEY_TEXT = conf::KEY_TEXT; +template +char const * key_bwt_trait_impl<0, T>::KEY_BWT = conf::KEY_BWT_INT; +template +char const * key_bwt_trait_impl<8, T>::KEY_BWT = conf::KEY_BWT; +template +using key_text_trait = key_text_trait_impl; +template +using key_bwt_trait = key_bwt_trait_impl; +} +#endif +#ifndef INCLUDED_SDSL_IO +#define INCLUDED_SDSL_IO +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_PLATFORM +#define INCLUDED_SDSL_PLATFORM +namespace sdsl +{ +#if defined(__clang__) +#define COMPILER_CLANG +#endif +#if defined(__GNUC__) && !defined(COMPILER_CLANG) +#define COMPILER_GCC +#endif +#define SDSL_FALLTHROUGH +#if defined(__has_cpp_attribute) +# if __has_cpp_attribute(fallthrough) +#undef SDSL_FALLTHROUGH +# if __cplusplus < 201500 && defined(COMPILER_GCC) +#define SDSL_FALLTHROUGH [[gnu::fallthrough]]; +# elif __cplusplus < 201500 && defined(COMPILER_CLANG) +#define SDSL_FALLTHROUGH [[clang::fallthrough]]; +# else +#define SDSL_FALLTHROUGH [[fallthrough]]; +# endif +# endif +#endif +} +#endif +#ifndef INCLUDED_SDSL_CONCEPTS +#define INCLUDED_SDSL_CONCEPTS +#include +namespace sdsl +{ +struct bv_tag +{}; +struct iv_tag +{}; +struct csa_tag +{}; +struct cst_tag +{}; +struct wt_tag +{}; +struct psi_tag +{}; +struct lf_tag +{}; +struct csa_member_tag +{}; +struct lcp_tag +{}; +struct lcp_plain_tag +{}; +struct lcp_permuted_tag +{}; +struct lcp_tree_compressed_tag +{}; +struct lcp_tree_and_lf_compressed_tag +{}; +struct alphabet_tag +{}; +struct byte_alphabet_tag +{ + static const uint8_t WIDTH = 8; +}; +struct int_alphabet_tag +{ + static const uint8_t WIDTH = 0; +}; +struct sa_sampling_tag +{}; +struct isa_sampling_tag +{}; +template +struct enable_if_type +{ + typedef t_r type; +}; +template +struct index_tag +{ + typedef t_enable type; +}; +template +struct index_tag::type> +{ + using type = typename t_idx::index_category; +}; +template +struct sampling_tag +{ + typedef t_enable type; +}; +template +struct sampling_tag::type> +{ + using type = typename t_sampling::sampling_category; +}; +template +struct is_enc_vec +{ + static constexpr bool value = false; +}; +template +struct is_enc_vec::type> +{ + static constexpr bool value = true; +}; +template +struct is_alphabet +{ + static constexpr bool value = false; +}; +template +struct is_alphabet::type> +{ + static constexpr bool value = true; +}; +} +#endif +#ifndef INCLUDED_SDSL_SFSTREAM +#define INCLUDED_SDSL_SFSTREAM +#include +#include +#ifndef INCLUDED_SDSL_RAM_FSTREAMBUF +#define INCLUDED_SDSL_RAM_FSTREAMBUF +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_MEMORY_TRACKING +#define INCLUDED_SDSL_MEMORY_TRACKING +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef _WIN32 +# ifndef NOMINMAX +#define NOMINMAX 1 +# endif +# include +# include +#else +#endif +namespace sdsl +{ +void memory_monitor_record(int64_t); +template +struct track_allocator +{ + using value_type = T; + track_allocator() = default; + template + track_allocator(track_allocator const &) + {} + T * allocate(std::size_t n) + { + if (n <= std::numeric_limits::max() / sizeof(T)) + { + size_t s = n * sizeof(T); + if (auto ptr = std::malloc(s)) + { + memory_monitor_record(s); + return static_cast(ptr); + } + } + throw std::bad_alloc(); + } + void deallocate(T * ptr, std::size_t n) + { + std::size_t s = n * sizeof(T); + memory_monitor_record(-((int64_t)s)); + std::free(ptr); + } +}; +template +inline bool operator==(track_allocator const &, track_allocator const &) +{ + return true; +} +template +inline bool operator!=(track_allocator const & a, track_allocator const & b) +{ + return !(a == b); +} +class spin_lock +{ +private: + std::atomic_flag m_slock; +public: + spin_lock() + { + m_slock.clear(); + } + void lock() + { + while (m_slock.test_and_set(std::memory_order_acquire)) + { + } + }; + void unlock() + { + m_slock.clear(std::memory_order_release); + }; +}; +namespace ram_fs +{ +typedef std::vector> content_type; +} +struct ramfs_storage +{ + typedef std::map mss_type; + typedef std::map mis_type; + std::recursive_mutex m_rlock; + mss_type m_map; + mis_type m_fd_map; + ramfs_storage() + { + m_fd_map[-1] = ""; + } + ~ramfs_storage() + {} +}; +struct mm_alloc +{ + using timer = std::chrono::high_resolution_clock; + timer::time_point timestamp; + int64_t usage; + mm_alloc(timer::time_point t, int64_t u) : timestamp(t), usage(u){}; +}; +struct mm_event +{ + using timer = std::chrono::high_resolution_clock; + std::string name; + std::vector allocations; + mm_event(std::string n, int64_t usage) : name(n) + { + allocations.emplace_back(timer::now(), usage); + }; + bool operator<(mm_event const & a) const + { + if (a.allocations.size() && this->allocations.size()) + { + if (this->allocations[0].timestamp == a.allocations[0].timestamp) + { + return this->allocations.back().timestamp < a.allocations.back().timestamp; + } + else + { + return this->allocations[0].timestamp < a.allocations[0].timestamp; + } + } + return true; + } +}; +struct tracker_storage +{ + using timer = std::chrono::high_resolution_clock; + std::chrono::milliseconds log_granularity = std::chrono::milliseconds(20ULL); + int64_t current_usage = 0; + bool track_usage = false; + std::vector completed_events; + std::stack event_stack; + timer::time_point start_log; + timer::time_point last_event; + spin_lock spinlock; + tracker_storage() + {} + ~tracker_storage() + {} +}; +template +void write_mem_log(std::ostream & out, tracker_storage const & m); +class memory_monitor +{ +public: + using timer = std::chrono::high_resolution_clock; + struct mm_event_proxy + { + bool add; + timer::time_point created; + mm_event_proxy(std::string const & name, int64_t usage, bool a) : add(a) + { + if (add) + { + auto & m = *(the_monitor().m_tracker); + std::lock_guard lock(m.spinlock); + m.event_stack.emplace(name, usage); + } + } + ~mm_event_proxy() + { + if (add) + { + auto & m = *(the_monitor().m_tracker); + std::lock_guard lock(m.spinlock); + auto & cur = m.event_stack.top(); + auto cur_time = timer::now(); + cur.allocations.emplace_back(cur_time, m.current_usage); + m.completed_events.emplace_back(std::move(cur)); + m.event_stack.pop(); + if (!m.event_stack.empty()) + { + if (m.event_stack.top().allocations.size()) + { + auto last_usage = m.event_stack.top().allocations.back().usage; + m.event_stack.top().allocations.emplace_back(cur_time, last_usage); + } + } + } + } + }; +private: + tracker_storage * m_tracker; + ramfs_storage * m_ram_fs; + memory_monitor() + { + m_tracker = new tracker_storage(); + m_ram_fs = new ramfs_storage(); + }; + ~memory_monitor() + { + if (m_tracker->track_usage) + { + stop(); + } + delete m_ram_fs; + delete m_tracker; + } + memory_monitor(memory_monitor const &) = delete; + memory_monitor & operator=(memory_monitor const &) = delete; + static memory_monitor & the_monitor() + { + static memory_monitor m; + return m; + } +public: + static void granularity(std::chrono::milliseconds ms) + { + auto & m = *(the_monitor().m_tracker); + m.log_granularity = ms; + } + static int64_t peak() + { + auto & m = *(the_monitor().m_tracker); + int64_t max = 0; + for (auto events : m.completed_events) + { + for (auto alloc : events.allocations) + { + if (max < alloc.usage) + { + max = alloc.usage; + } + } + } + return max; + } + static ramfs_storage & ram_fs() + { + return *(the_monitor().m_ram_fs); + } + static void start() + { + auto & m = *(the_monitor().m_tracker); + m.track_usage = true; + if (m.completed_events.size()) + { + m.completed_events.clear(); + } + while (m.event_stack.size()) + { + m.event_stack.pop(); + } + m.start_log = timer::now(); + m.current_usage = 0; + m.last_event = m.start_log; + m.event_stack.emplace("unknown", 0); + } + static void stop() + { + auto & m = *(the_monitor().m_tracker); + while (!m.event_stack.empty()) + { + m.completed_events.emplace_back(std::move(m.event_stack.top())); + m.event_stack.pop(); + } + m.track_usage = false; + } + static void record(int64_t delta) + { + auto & m = *(the_monitor().m_tracker); + if (m.track_usage) + { + std::lock_guard lock(m.spinlock); + auto cur = timer::now(); + if (m.last_event + m.log_granularity < cur) + { + m.event_stack.top().allocations.emplace_back(cur, m.current_usage); + m.current_usage = m.current_usage + delta; + m.event_stack.top().allocations.emplace_back(cur, m.current_usage); + m.last_event = cur; + } + else + { + if (m.event_stack.top().allocations.size()) + { + m.current_usage = m.current_usage + delta; + m.event_stack.top().allocations.back().usage = m.current_usage; + m.event_stack.top().allocations.back().timestamp = cur; + } + } + } + } + static mm_event_proxy event(std::string const & name) + { + auto & m = *(the_monitor().m_tracker); + if (m.track_usage) + { + return mm_event_proxy(name, m.current_usage, true); + } + return mm_event_proxy(name, m.current_usage, false); + } + template + static void write_memory_log(std::ostream & out) + { + write_mem_log(out, *(the_monitor().m_tracker)); + } +}; +inline void memory_monitor_record(int64_t delta) +{ + memory_monitor::record(delta); +} +} +#endif +#ifndef INCLUDED_SDSL_RAM_FS +#define INCLUDED_SDSL_RAM_FS +#include +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +namespace ram_fs +{ +inline bool exists(std::string const & name) +{ + auto & rf = memory_monitor::ram_fs(); + std::lock_guard lock(rf.m_rlock); + return rf.m_map.find(name) != rf.m_map.end(); +} +inline void store(std::string const & name, content_type data) +{ + auto & rf = memory_monitor::ram_fs(); + std::lock_guard lock(rf.m_rlock); + if (!exists(name)) + { + std::string cname = name; + rf.m_map.insert(std::make_pair(std::move(cname), std::move(data))); + } + else + { + rf.m_map[name] = std::move(data); + } +} +inline size_t file_size(std::string const & name) +{ + auto & rf = memory_monitor::ram_fs(); + std::lock_guard lock(rf.m_rlock); + if (exists(name)) + { + return rf.m_map[name].size(); + } + else + { + return 0; + } +} +inline content_type & content(std::string const & name) +{ + auto & rf = memory_monitor::ram_fs(); + std::lock_guard lock(rf.m_rlock); + return rf.m_map[name]; +} +inline int remove(std::string const & name) +{ + auto & rf = memory_monitor::ram_fs(); + std::lock_guard lock(rf.m_rlock); + if (exists(name)) + { + rf.m_map.erase(name); + } + return 0; +} +inline int rename(const std::string old_filename, const std::string new_filename) +{ + auto & rf = memory_monitor::ram_fs(); + std::lock_guard lock(rf.m_rlock); + rf.m_map[new_filename] = std::move(rf.m_map[old_filename]); + remove(old_filename); + return 0; +} +inline int open(std::string const & name) +{ + auto & rf = memory_monitor::ram_fs(); + std::lock_guard lock(rf.m_rlock); + if (!exists(name)) + { + store(name, content_type{}); + } + int fd = -2; + auto largest_fd = rf.m_fd_map.rbegin()->first; + if (largest_fd < 0) + { + auto smallest_fd = rf.m_fd_map.begin()->first; + fd = smallest_fd - 1; + } + else + { + rf.m_fd_map.erase(largest_fd); + fd = -largest_fd; + } + rf.m_fd_map[fd] = name; + return fd; +} +inline int close(int const fd) +{ + auto & rf = memory_monitor::ram_fs(); + std::lock_guard lock(rf.m_rlock); + if (fd >= -1) + return -1; + if (rf.m_fd_map.count(fd) == 0) + { + return -1; + } + else + { + rf.m_fd_map.erase(fd); + rf.m_fd_map[-fd] = ""; + } + return 0; +} +inline content_type & content(int const fd) +{ + auto & rf = memory_monitor::ram_fs(); + std::lock_guard lock(rf.m_rlock); + auto name = rf.m_fd_map[fd]; + return rf.m_map[name]; +} +inline int truncate(int const fd, size_t new_size) +{ + auto & rf = memory_monitor::ram_fs(); + std::lock_guard lock(rf.m_rlock); + if (rf.m_fd_map.count(fd) == 0) + return -1; + auto name = rf.m_fd_map[fd]; + rf.m_map[name].reserve(new_size); + rf.m_map[name].resize(new_size, 0); + return 0; +} +inline size_t file_size(int const fd) +{ + auto & rf = memory_monitor::ram_fs(); + std::lock_guard lock(rf.m_rlock); + if (rf.m_fd_map.count(fd) == 0) + return 0; + auto name = rf.m_fd_map[fd]; + return rf.m_map[name].size(); +} +} +inline bool is_ram_file(std::string const & file) +{ + if (file.size() > 0) + { + if (file[0] == '@') + { + return true; + } + } + return false; +} +inline bool is_ram_file(int const fd) +{ + return fd < -1; +} +inline std::string ram_file_name(std::string const & file) +{ + if (is_ram_file(file)) + { + return file; + } + else + { + return "@" + file; + } +} +inline std::string disk_file_name(std::string const & file) +{ + if (!is_ram_file(file)) + { + return file; + } + else + { + return file.substr(1); + } +} +inline int remove(std::string const & file) +{ + if (is_ram_file(file)) + { + return ram_fs::remove(file); + } + else + { + return std::remove(file.c_str()); + } +} +inline int rename(std::string const & old_filename, std::string const & new_filename) +{ + if (is_ram_file(old_filename)) + { + if (!is_ram_file(new_filename)) + { + return -1; + } + return ram_fs::rename(old_filename, new_filename); + } + else + { + return std::rename(old_filename.c_str(), new_filename.c_str()); + } +} +} +#endif +namespace sdsl +{ +class ram_filebuf : public std::streambuf +{ +private: + ram_fs::content_type * m_ram_file = nullptr; + void pbump64(std::ptrdiff_t x) + { + while (x > std::numeric_limits::max()) + { + pbump(std::numeric_limits::max()); + x -= std::numeric_limits::max(); + } + pbump(x); + } +public: + virtual ~ram_filebuf(){}; + ram_filebuf(){}; + ram_filebuf(ram_fs::content_type & ram_file) : m_ram_file(&ram_file) + { + char * begin = m_ram_file->data(); + char * end = begin + m_ram_file->size(); + setg(begin, begin, end); + } + std::streambuf * open(const std::string name, std::ios_base::openmode mode) + { + if ((mode & std::ios_base::in) and !(mode & std::ios_base::trunc)) + { + if (!ram_fs::exists(name)) + { + m_ram_file = nullptr; + } + else + { + m_ram_file = &ram_fs::content(name); + } + } + else + { + if (!ram_fs::exists(name)) + { + ram_fs::store(name, ram_fs::content_type()); + } + m_ram_file = &ram_fs::content(name); + if ((mode & std::ios_base::out) and !(mode & std::ios_base::app)) + { + m_ram_file->clear(); + } + } + if (m_ram_file and (mode & std::ios_base::trunc)) + { + m_ram_file->clear(); + } + if (m_ram_file) + { + if (mode & std::ios_base::ate) + { + } + else + {} + setg(m_ram_file->data(), m_ram_file->data(), m_ram_file->data() + m_ram_file->size()); + setp(m_ram_file->data(), m_ram_file->data() + m_ram_file->size()); + } + return m_ram_file ? this : nullptr; + } + bool is_open() + { + return m_ram_file != nullptr; + } + ram_filebuf * close() + { + if (!this->is_open()) + return nullptr; + m_ram_file = nullptr; + setg(nullptr, nullptr, nullptr); + setp(nullptr, nullptr); + return this; + } + pos_type seekpos(pos_type sp, std::ios_base::openmode mode = std::ios_base::in | std::ios_base::out) override + { + if (sp >= (pos_type)0 and sp <= (pos_type)m_ram_file->size()) + { + setg(m_ram_file->data(), m_ram_file->data() + sp, m_ram_file->data() + m_ram_file->size()); + setp(m_ram_file->data(), m_ram_file->data() + m_ram_file->size()); + pbump64(sp); + } + else + { + if (mode & std::ios_base::out) + { + m_ram_file->reserve(sp); + m_ram_file->resize(sp, 0); + setg(m_ram_file->data(), m_ram_file->data() + sp, m_ram_file->data() + m_ram_file->size()); + setp(m_ram_file->data(), m_ram_file->data() + m_ram_file->size()); + pbump64(sp); + } + else + { + return pos_type(off_type(-1)); + } + } + return sp; + } + pos_type pubseekoff(off_type off, + std::ios_base::seekdir way, + std::ios_base::openmode which = std::ios_base::in | std::ios_base::out) + { + if (std::ios_base::beg == way) + { + if (seekpos(off, which) == pos_type(-1)) + { + return pos_type(-1); + } + } + else if (std::ios_base::cur == way) + { + if (seekpos(gptr() - eback() + off, which) == pos_type(-1)) + { + return pos_type(-1); + } + } + else if (std::ios_base::end == way) + { + if (seekpos(egptr() - eback() + off, which) == pos_type(-1)) + { + return pos_type(-1); + } + } + return gptr() - eback(); + } + pos_type pubseekpos(pos_type sp, std::ios_base::openmode which = std::ios_base::in | std::ios_base::out) + { + if (seekpos(sp, which) == pos_type(-1)) + { + return pos_type(-1); + } + else + { + return gptr() - eback(); + } + } + std::streamsize xsputn(char_type const * s, std::streamsize n) override + { + if (!m_ram_file) + { + return 0; + } + if (n < epptr() - pptr()) + { + std::copy(s, s + n, pptr()); + pbump64(n); + return n; + } + else + { + if (epptr() - pbase() == (std::ptrdiff_t)m_ram_file->size() and epptr() == pptr()) + { + m_ram_file->insert(m_ram_file->end(), s, s + n); + setp(m_ram_file->data(), m_ram_file->data() + m_ram_file->size()); + std::ptrdiff_t add = epptr() - pbase(); + pbump64(add); + setg(m_ram_file->data(), gptr(), m_ram_file->data() + m_ram_file->size()); + return n; + } + else + { + for (std::streamsize i = 0; i < n; ++i) + { + if (traits_type::eq_int_type(sputc(s[i]), traits_type::eof())) + { + return i; + } + } + return n; + } + } + } + int sync() override + { + return 0; + } + int_type overflow(int_type c = traits_type::eof()) override + { + if (m_ram_file) + { + m_ram_file->push_back(c); + setp(m_ram_file->data(), m_ram_file->data() + m_ram_file->size()); + std::ptrdiff_t add = epptr() - pbase(); + pbump64(add); + setg(m_ram_file->data(), gptr(), m_ram_file->data() + m_ram_file->size()); + } + return traits_type::to_int_type(c); + } +}; +} +#endif +namespace sdsl +{ +class osfstream : public std::ostream +{ +public: + typedef std::streambuf * buf_ptr_type; +private: + buf_ptr_type m_streambuf = nullptr; + std::string m_file = ""; +public: + typedef void * voidptr; + osfstream() : std::ostream(nullptr) + { + this->init(m_streambuf); + } + osfstream(std::string const & file, std::ios_base::openmode mode = std::ios_base::out) : std::ostream(nullptr) + { + this->init(m_streambuf); + open(file, mode); + } + buf_ptr_type open(std::string const & file, std::ios_base::openmode mode = std::ios_base::out) + { + delete m_streambuf; + m_streambuf = nullptr; + m_file = file; + std::streambuf * success = nullptr; + if (is_ram_file(file)) + { + m_streambuf = new ram_filebuf(); + success = ((ram_filebuf *)m_streambuf)->open(m_file, mode | std::ios_base::out); + } + else + { + m_streambuf = new std::filebuf(); + success = ((std::filebuf *)m_streambuf)->open(m_file, mode | std::ios_base::out); + } + if (success) + { + this->clear(); + } + else + { + this->setstate(std::ios_base::failbit); + delete m_streambuf; + m_streambuf = nullptr; + } + this->rdbuf(m_streambuf); + return m_streambuf; + } + bool is_open() + { + if (nullptr == m_streambuf) + return false; + if (is_ram_file(m_file)) + { + return ((ram_filebuf *)m_streambuf)->is_open(); + } + else + { + return ((std::filebuf *)m_streambuf)->is_open(); + } + } + void close() + { + bool fail = false; + if (nullptr == m_streambuf) + { + fail = true; + } + else + { + if (is_ram_file(m_file)) + { + fail = !((ram_filebuf *)m_streambuf)->close(); + } + else + { + fail = !((std::filebuf *)m_streambuf)->close(); + } + } + if (fail) + this->setstate(std::ios::failbit); + } + ~osfstream() + { + delete m_streambuf; + } + operator voidptr() const + { + return m_streambuf; + } + osfstream & seekp(pos_type pos) + { + ios_base::iostate err = std::ios_base::iostate(std::ios_base::goodbit); + try + { + if (!this->fail()) + { + pos_type p = 0; + if (is_ram_file(m_file)) + { + p = ((ram_filebuf *)m_streambuf)->pubseekpos(pos, std::ios_base::out); + } + else + { + p = ((std::filebuf *)m_streambuf)->pubseekpos(pos, std::ios_base::out); + } + if (p == pos_type(off_type(-1))) + { + err |= ios_base::failbit; + this->setstate(err); + } + } + } + catch (...) + { + if (err) + { + this->setstate(err); + } + } + return *this; + } + osfstream & seekp(off_type off, ios_base::seekdir way) + { + ios_base::iostate err = std::ios_base::iostate(ios_base::goodbit); + try + { + if (!this->fail()) + { + pos_type p = 0; + if (is_ram_file(m_file)) + { + p = ((ram_filebuf *)m_streambuf)->pubseekoff(off, way, std::ios_base::out); + } + else + { + p = ((std::filebuf *)m_streambuf)->pubseekoff(off, way, std::ios_base::out); + } + if (p == pos_type(off_type(-1))) + { + err |= ios_base::failbit; + this->setstate(err); + } + } + } + catch (...) + { + if (err) + { + this->setstate(err); + } + } + return *this; + } + std::streampos tellp(); +}; +class isfstream : public std::istream +{ + typedef std::streambuf * buf_ptr_type; +private: + buf_ptr_type m_streambuf = nullptr; + std::string m_file = ""; +public: + typedef void * voidptr; + isfstream() : std::istream(nullptr) + { + this->init(m_streambuf); + } + isfstream(std::string const & file, std::ios_base::openmode mode = std::ios_base::in) : std::istream(nullptr) + { + this->init(m_streambuf); + open(file, mode); + } + buf_ptr_type open(std::string const & file, std::ios_base::openmode mode = std::ios_base::in) + { + delete m_streambuf; + m_streambuf = nullptr; + m_file = file; + std::streambuf * success = nullptr; + if (is_ram_file(file)) + { + m_streambuf = new ram_filebuf(); + success = ((ram_filebuf *)m_streambuf)->open(m_file, mode | std::ios_base::in); + } + else + { + m_streambuf = new std::filebuf(); + success = ((std::filebuf *)m_streambuf)->open(m_file, mode | std::ios_base::in); + } + if (success) + { + this->clear(); + } + else + { + this->setstate(std::ios_base::failbit); + delete m_streambuf; + m_streambuf = nullptr; + } + this->rdbuf(m_streambuf); + return m_streambuf; + } + bool is_open() + { + if (nullptr == m_streambuf) + return false; + if (is_ram_file(m_file)) + { + return ((ram_filebuf *)m_streambuf)->is_open(); + } + else + { + return ((std::filebuf *)m_streambuf)->is_open(); + } + } + void close() + { + bool fail = false; + if (nullptr == m_streambuf) + { + fail = true; + } + else + { + if (is_ram_file(m_file)) + { + fail = !((ram_filebuf *)m_streambuf)->close(); + } + else + { + fail = !((std::filebuf *)m_streambuf)->close(); + } + } + if (fail) + this->setstate(std::ios::failbit); + } + ~isfstream() + { + delete m_streambuf; + } + operator voidptr() const + { + return m_streambuf; + } + isfstream & seekg(pos_type pos) + { + ios_base::iostate err = std::ios_base::iostate(std::ios_base::goodbit); + try + { + if (!this->fail()) + { + pos_type p = 0; + if (is_ram_file(m_file)) + { + p = ((ram_filebuf *)m_streambuf)->pubseekpos(pos, std::ios_base::in); + } + else + { + p = ((std::filebuf *)m_streambuf)->pubseekpos(pos, std::ios_base::in); + } + if (p == pos_type(off_type(-1))) + { + err |= ios_base::failbit; + } + } + } + catch (...) + { + if (err) + { + this->setstate(err); + } + } + return *this; + } + isfstream & seekg(off_type off, ios_base::seekdir way) + { + ios_base::iostate err = std::ios_base::iostate(ios_base::goodbit); + try + { + if (!this->fail()) + { + pos_type p = 0; + if (is_ram_file(m_file)) + { + p = ((ram_filebuf *)m_streambuf)->pubseekoff(off, way, std::ios_base::in); + } + else + { + p = ((std::filebuf *)m_streambuf)->pubseekoff(off, way, std::ios_base::in); + } + if (p == pos_type(off_type(-1))) + { + err |= ios_base::failbit; + } + } + } + catch (...) + { + if (err) + { + this->setstate(err); + } + } + return *this; + } + std::streampos tellg() + { + ios_base::iostate err = std::ios_base::iostate(ios_base::goodbit); + pos_type p = pos_type(off_type(-1)); + try + { + if (!this->fail()) + { + if (is_ram_file(m_file)) + { + p = ((ram_filebuf *)m_streambuf)->pubseekoff(0, std::ios_base::cur); + } + else + { + p = ((std::filebuf *)m_streambuf)->pubseekoff(0, std::ios_base::cur); + } + if (p == pos_type(off_type(-1))) + { + err |= ios_base::failbit; + } + } + } + catch (...) + { + if (err) + { + this->setstate(err); + } + } + return p; + } +}; +} +#endif +#ifndef INCLUDED_SDSL_STRUCTURE_TREE +#define INCLUDED_SDSL_STRUCTURE_TREE +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_UINTX_T +#define INCLUDED_SDSL_UINTX_T +#include +using std::int16_t; +using std::int32_t; +using std::int64_t; +using std::int8_t; +using std::uint16_t; +using std::uint32_t; +using std::uint64_t; +using std::uint8_t; +#endif +namespace sdsl +{ +inline void output_tab(std::ostream & out, size_t level) +{ + for (size_t i = 0; i < level; i++) + out << "\t"; +} +class structure_tree_node +{ +private: + using map_type = std::unordered_map>; + map_type m_children; +public: + map_type const & children = m_children; + size_t size = 0; + std::string name; + std::string type; +public: + structure_tree_node(std::string const & n, std::string const & t) : name(n), type(t) + {} + structure_tree_node * add_child(std::string const & n, std::string const & t) + { + auto hash = n + t; + auto child_itr = m_children.find(hash); + if (child_itr == m_children.end()) + { + structure_tree_node * new_node = new structure_tree_node(n, t); + m_children[hash] = std::unique_ptr(new_node); + return new_node; + } + else + { + return (*child_itr).second.get(); + } + } + void add_size(size_t s) + { + size += s; + } +}; +class structure_tree +{ +public: + static structure_tree_node * add_child(structure_tree_node * v, std::string const & name, std::string const & type) + { + if (v) + return v->add_child(name, type); + return nullptr; + }; + static void add_size(structure_tree_node * v, uint64_t value) + { + if (v) + v->add_size(value); + }; +}; +template +void write_structure_tree(structure_tree_node const * v, std::ostream & out, size_t level = 0); +template <> +inline void write_structure_tree(structure_tree_node const * v, std::ostream & out, size_t level) +{ + if (v) + { + output_tab(out, level); + out << "{" << std::endl; + output_tab(out, level + 1); + out << "\"class_name\":" + << "\"" << v->type << "\"," << std::endl; + output_tab(out, level + 1); + out << "\"name\":" + << "\"" << v->name << "\"," << std::endl; + output_tab(out, level + 1); + out << "\"size\":" + << "\"" << v->size << "\""; + if (v->children.size()) + { + out << "," << std::endl; + output_tab(out, level + 1); + out << "\"children\":[" << std::endl; + size_t written_child_elements = 0; + for (auto const & child : v->children) + { + if (written_child_elements++ > 0) + { + out << "," << std::endl; + } + write_structure_tree(child.second.get(), out, level + 2); + } + out << std::endl; + output_tab(out, level + 1); + out << "]" << std::endl; + } + else + { + out << std::endl; + } + output_tab(out, level); + out << "}"; + } +} +inline std::string create_html_header(char const * file_name) +{ + std::stringstream jsonheader; + jsonheader << "\n" + << " \n" + << " \n" + << " " << file_name << "\n" + << " \n" + << " \n" + << " \n" + << " \n" + << "\n" + << "\n" + << "
" << std::endl; + return jsonheader.str(); +} +inline std::string create_js_body(std::string const & jsonsize) +{ + std::stringstream jsonbody; + jsonbody << "" << std::endl + << "" << std::endl + << "" << std::endl; + return jsonbody.str(); +} +template <> +inline void +write_structure_tree(structure_tree_node const * v, std::ostream & out, SDSL_UNUSED size_t level) +{ + std::stringstream json_data; + write_structure_tree(v, json_data); + out << create_html_header("sdsl data structure visualization"); + out << create_js_body(json_data.str()); +} +} +#endif +#ifndef INCLUDED_SDSL_UTIL +#define INCLUDED_SDSL_UTIL +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define SDSL_STR(x) #x +#define SDSL_XSTR(s) SDSL_STR(s) +#include +#ifndef MSVC_COMPILER +# include +#endif +#ifndef _WIN32 +# include +# include +#else +# include +# include +#endif +namespace sdsl +{ +namespace util +{ +SDSL_UNUSED static bool verbose = false; +inline void set_verbose() +{ + verbose = true; +} +template +void set_random_bits(t_int_vec & v, int seed = 0); +template +void _set_zero_bits(t_int_vec & v); +template +void _set_one_bits(t_int_vec & v); +template +void bit_compress(t_int_vec & v); +template +void expand_width(t_int_vec & v, uint8_t new_width); +template +void mod(t_int_vec & v, typename t_int_vec::size_type m); +inline void cyclic_shifts(uint64_t * vec, uint8_t & n, uint64_t k, uint8_t int_width); +template +void set_to_value(t_int_vec & v, uint64_t k); +template +void set_to_value(t_int_vec & v, uint64_t k, t_int_vec_iterator it); +template +void set_to_id(t_int_vec & v); +template +typename t_int_vec::size_type cnt_one_bits(t_int_vec const & v); +template +typename t_int_vec::size_type cnt_onezero_bits(t_int_vec const & v); +template +typename t_int_vec::size_type cnt_zeroone_bits(t_int_vec const & v); +template +typename t_int_vec::size_type next_bit(t_int_vec const & v, uint64_t idx); +template +typename t_int_vec::size_type prev_bit(t_int_vec const & v, uint64_t idx); +inline size_t file_size(std::string const & file) +{ + if (is_ram_file(file)) + { + return ram_fs::file_size(file); + } + else + { + struct stat fs; + stat(file.c_str(), &fs); + return fs.st_size; + } +} +inline std::string basename(std::string file) +{ + file = disk_file_name(file); +#ifdef _WIN32 + char * c = _strdup((char const *)file.c_str()); + char file_name[_MAX_FNAME] = {0}; +# ifdef MSVC_COMPILER + ::_splitpath_s(c, NULL, 0, NULL, NULL, file_name, _MAX_FNAME, NULL, 0); +# else + ::_splitpath(c, NULL, NULL, file_name, NULL); +# endif + std::string res(file_name); +#else + char * c = strdup((char const *)file.c_str()); + std::string res = std::string(::basename(c)); +#endif + free(c); + return res; +} +inline std::string dirname(std::string file) +{ + bool ram_file = is_ram_file(file); + file = disk_file_name(file); +#ifdef _WIN32 + char * c = _strdup((char const *)file.c_str()); + char dir_name[_MAX_DIR] = {0}; + char drive[_MAX_DRIVE] = {0}; +# ifdef MSVC_COMPILER + ::_splitpath_s(c, drive, _MAX_DRIVE, dir_name, _MAX_DIR, NULL, 0, NULL, 0); +# else + ::_splitpath(c, drive, dir_name, NULL, NULL); +# endif + std::string res = std::string(drive) + std::string(dir_name); +#else + char * c = strdup((char const *)file.c_str()); + std::string res = std::string(::dirname(c)); + auto it = res.begin(); + auto next_it = res.begin() + 1; + while (it != res.end() and next_it != res.end()) + { + if (*next_it != '/' or *it != '/') + { + *(++it) = *next_it; + } + ++next_it; + } + res.resize(it - res.begin() + 1); +#endif + free(c); + if (ram_file) + { + if ("." == res) + { + res = ram_file_name(""); + } + else if ("/" == res) + { + res = ram_file_name(res); + } + } + return res; +} +inline std::string demangle(std::string const & name) +{ +#ifndef _WIN32 + char buf[4096]; + size_t size = 4096; + int status = 0; + abi::__cxa_demangle(name.c_str(), buf, &size, &status); + if (status == 0) + return std::string(buf); + return name; +#else + return name; +#endif +} +inline std::string demangle2(std::string const & name) +{ + std::string result = demangle(name); + std::vector words_to_delete; + words_to_delete.push_back("sdsl::"); + words_to_delete.push_back("(unsigned char)"); + words_to_delete.push_back(", unsigned long"); + for (size_t k = 0; k < words_to_delete.size(); ++k) + { + std::string w = words_to_delete[k]; + for (size_t i = result.find(w); i != std::string::npos; i = result.find(w, i)) + { + result.erase(i, w.length()); + ++i; + } + } + size_t index = 0; + std::string to_replace = "int_vector<1>"; + while ((index = result.find(to_replace, index)) != std::string::npos) + { + result.replace(index, to_replace.size(), "bit_vector"); + } + return result; +} +template +std::string to_string(T const & t, int w); +template +uint64_t hashvalue_of_classname(T const &) +{ + std::hash str_hash; + return str_hash(sdsl::util::demangle2(typeid(T).name())); +} +template +std::string class_to_hash(T const & t) +{ + return to_string(hashvalue_of_classname(t)); +} +template +std::string class_name(T const & t) +{ + std::string result = demangle2(typeid(t).name()); + size_t template_pos = result.find("<"); + if (template_pos != std::string::npos) + { + result = result.erase(template_pos); + } + return result; +} +inline char * str_from_errno() +{ +#ifdef MSVC_COMPILER +#pragma warning(disable : 4996) + return strerror(errno); +#pragma warning(default : 4996) +#else + return strerror(errno); +#endif +} +inline uint64_t _id_helper() +{ + static std::atomic id{0u}; + return id++; +} +inline uint64_t pid() +{ +#ifdef MSVC_COMPILER + return _getpid(); +#else + return getpid(); +#endif +} +inline uint64_t id() +{ + return _id_helper(); +} +template +std::string to_latex_string(T const & t); +inline std::string to_latex_string(unsigned char c) +{ + if (c == '_') + return "\\_"; + else if (c == '\0') + return "\\$"; + else + return to_string(c); +} +inline void delete_all_files(tMSS & file_map) +{ + for (auto file_pair : file_map) + { + sdsl::remove(file_pair.second); + } + file_map.clear(); +} +template +void clear(T & x) +{ + T y; + x = std::move(y); +} +template +void swap_support(S & s1, S & s2, P const * p1, P const * p2) +{ + std::swap(s1, s2); + s1.set_vector(p1); + s2.set_vector(p2); +} +template +void init_support(S & s, X const * x) +{ + S temp(x); + s = std::move(temp); + s.set_vector(x); +} +template +t_int_vec rnd_positions(uint8_t log_s, uint64_t & mask, uint64_t mod = 0, uint64_t seed = 17) +{ + mask = (1 << log_s) - 1; + t_int_vec rands(1 << log_s, 0); + set_random_bits(rands, seed); + if (mod > 0) + { + util::mod(rands, mod); + } + return rands; +} +template +struct is_regular : + std::integral_constant::value && std::is_copy_constructible::value + && std::is_move_constructible::value && std::is_copy_assignable::value + && std::is_move_assignable::value> +{}; +} +template +void util::set_random_bits(t_int_vec & v, int seed) +{ + std::mt19937_64 rng; + if (0 == seed) + { + rng.seed(std::chrono::system_clock::now().time_since_epoch().count() + util::id()); + } + else + rng.seed(seed); + uint64_t * data = v.data(); + if (v.empty()) + return; + *data = rng(); + for (typename t_int_vec::size_type i = 1; i < ((v.bit_size() + 63) >> 6); ++i) + { + *(++data) = rng(); + } +} +template +void util::mod(t_int_vec & v, typename t_int_vec::size_type m) +{ + for (typename t_int_vec::size_type i = 0; i < v.size(); ++i) + { + v[i] = v[i] % m; + } +} +template +void util::bit_compress(t_int_vec & v) +{ + auto max_elem = std::max_element(v.begin(), v.end()); + uint64_t max = 0; + if (max_elem != v.end()) + { + max = *max_elem; + } + uint8_t min_width = bits::hi(max) + 1; + uint8_t old_width = v.width(); + if (old_width > min_width) + { + uint64_t const * read_data = v.data(); + uint64_t * write_data = v.data(); + uint8_t read_offset = 0; + uint8_t write_offset = 0; + for (typename t_int_vec::size_type i = 0; i < v.size(); ++i) + { + uint64_t x = bits::read_int_and_move(read_data, read_offset, old_width); + bits::write_int_and_move(write_data, x, write_offset, min_width); + } + v.bit_resize(v.size() * min_width); + v.width(min_width); + } +} +template +void util::expand_width(t_int_vec & v, uint8_t new_width) +{ + uint8_t old_width = v.width(); + typename t_int_vec::size_type n = v.size(); + if (new_width > old_width) + { + if (n > 0) + { + typename t_int_vec::size_type i, old_pos, new_pos; + new_pos = (n - 1) * new_width; + old_pos = (n - 1) * old_width; + v.bit_resize(v.size() * new_width); + for (i = 0; i < n; ++i, new_pos -= new_width, old_pos -= old_width) + { + v.set_int(new_pos, v.get_int(old_pos, old_width), new_width); + } + } + v.width(new_width); + } +} +template +void util::_set_zero_bits(t_int_vec & v) +{ + std::for_each(v.data(), + v.data() + ((v.bit_size() + 63) >> 6), + [](uint64_t & value) + { + value = 0ULL; + }); +} +template +void util::_set_one_bits(t_int_vec & v) +{ + std::for_each(v.data(), + v.data() + ((v.bit_size() + 63) >> 6), + [](uint64_t & value) + { + value = -1ULL; + }); +} +inline void util::cyclic_shifts(uint64_t * vec, uint8_t & n, uint64_t k, uint8_t int_width) +{ + n = 0; + vec[0] = 0; + uint8_t offset = 0; + k &= 0xFFFFFFFFFFFFFFFFULL >> (64 - int_width); + do + { + vec[n] |= k << offset; + offset += int_width; + if (offset >= 64) + { + ++n; + if (int_width == 64) + return; + assert(int_width - (offset - 64) < 64); + vec[n] = k >> (int_width - (offset - 64)); + offset -= 64; + } + } + while (offset != 0); +} +template +void util::set_to_value(t_int_vec & v, uint64_t k) +{ + uint64_t * data = v.data(); + if (v.empty()) + return; + uint8_t int_width = v.width(); + if (int_width == 0) + { + throw std::logic_error("util::set_to_value can not be performed with int_width=0!"); + } + if (0 == k) + { + _set_zero_bits(v); + return; + } + if (bits::lo_set[int_width] == k) + { + _set_one_bits(v); + return; + } + uint8_t n; + uint64_t vec[65]; + util::cyclic_shifts(vec, n, k, int_width); + typename t_int_vec::size_type n64 = (v.bit_size() + 63) >> 6; + for (typename t_int_vec::size_type i = 0; i < n64;) + { + for (uint64_t ii = 0; ii < n and i < n64; ++ii, ++i) + { + *(data++) = vec[ii]; + } + } +} +template +void util::set_to_value(t_int_vec & v, uint64_t k, t_int_vec_iterator it) +{ + typedef typename t_int_vec::size_type size_type; + if (v.empty()) + return; + uint8_t int_width = v.width(); + if (int_width == 0) + { + throw std::logic_error("util::set_to_value can not be performed with int_width=0!"); + } + uint8_t n; + uint64_t vec[65]; + util::cyclic_shifts(vec, n, k, int_width); + size_type words = (v.bit_size() + 63) >> 6; + size_type word_pos = ((it - v.begin()) * int_width) >> 6; + uint8_t pos_in_word = ((it - v.begin()) * int_width) - (word_pos << 6); + uint8_t cyclic_shift = word_pos % n; + uint64_t * data = v.data() + word_pos; + *(data) &= bits::lo_set[pos_in_word]; + *(data) |= bits::lo_unset[pos_in_word] & vec[cyclic_shift++]; + ++word_pos; + while (word_pos < words) + { + for (; cyclic_shift < n && word_pos < words; ++cyclic_shift, ++word_pos) + { + *(++data) = vec[cyclic_shift]; + } + cyclic_shift = 0; + } +} +template +void util::set_to_id(t_int_vec & v) +{ + std::iota(v.begin(), v.end(), 0ULL); +} +template +typename t_int_vec::size_type util::cnt_one_bits(t_int_vec const & v) +{ + uint64_t const * data = v.data(); + if (v.empty()) + return 0; + typename t_int_vec::size_type result = bits::cnt(*data); + for (typename t_int_vec::size_type i = 1; i < ((v.bit_size() + 63) >> 6); ++i) + { + result += bits::cnt(*(++data)); + } + if (v.bit_size() & 0x3F) + { + result -= bits::cnt((*data) & (~bits::lo_set[v.bit_size() & 0x3F])); + } + return result; +} +template +typename t_int_vec::size_type util::cnt_onezero_bits(t_int_vec const & v) +{ + uint64_t const * data = v.data(); + if (v.empty()) + return 0; + uint64_t carry = 0, oldcarry = 0; + typename t_int_vec::size_type result = bits::cnt10(*data, carry); + for (typename t_int_vec::size_type i = 1; i < ((v.bit_size() + 63) >> 6); ++i) + { + oldcarry = carry; + result += bits::cnt10(*(++data), carry); + } + if (v.bit_size() & 0x3F) + { + result -= bits::cnt(bits::map10(*data, oldcarry) & bits::lo_unset[v.bit_size() & 0x3F]); + } + return result; +} +template +typename t_int_vec::size_type util::cnt_zeroone_bits(t_int_vec const & v) +{ + uint64_t const * data = v.data(); + if (v.empty()) + return 0; + uint64_t carry = 1, oldcarry = 1; + typename t_int_vec::size_type result = bits::cnt01(*data, carry); + for (typename t_int_vec::size_type i = 1; i < ((v.bit_size() + 63) >> 6); ++i) + { + oldcarry = carry; + result += bits::cnt01(*(++data), carry); + } + if (v.bit_size() & 0x3F) + { + result -= bits::cnt(bits::map01(*data, oldcarry) & bits::lo_unset[v.bit_size() & 0x3F]); + } + return result; +} +template +typename t_int_vec::size_type util::next_bit(t_int_vec const & v, uint64_t idx) +{ + uint64_t pos = idx >> 6; + uint64_t node = v.data()[pos]; + node >>= (idx & 0x3F); + if (node) + { + return idx + bits::lo(node); + } + else + { + ++pos; + while ((pos << 6) < v.bit_size()) + { + if (v.data()[pos]) + { + return (pos << 6) | bits::lo(v.data()[pos]); + } + ++pos; + } + return v.bit_size(); + } +} +template +typename t_int_vec::size_type util::prev_bit(t_int_vec const & v, uint64_t idx) +{ + uint64_t pos = idx >> 6; + uint64_t node = v.data()[pos]; + node <<= 63 - (idx & 0x3F); + if (node) + { + return bits::hi(node) + (pos << 6) - (63 - (idx & 0x3F)); + } + else + { + --pos; + while ((pos << 6) < v.bit_size()) + { + if (v.data()[pos]) + { + return (pos << 6) | bits::hi(v.data()[pos]); + } + --pos; + } + return v.bit_size(); + } +} +template +std::string util::to_string(T const & t, int w) +{ + std::stringstream ss; + ss << std::setw(w) << t; + return ss.str(); +} +template +std::string util::to_latex_string(T const & t) +{ + return to_string(t); +} +} +#endif +namespace sdsl +{ +template +class int_vector; +int remove(std::string const &); +template +void load_vector(std::vector &, std::istream &); +template +uint64_t +serialize_vector(std::vector const &, std::ostream &, sdsl::structure_tree_node * v = nullptr, std::string = ""); +template +struct has_serialize +{ + template + static constexpr auto check(T *) -> + typename std::is_same().serialize(std::declval(), + std::declval(), + std::declval())), + typename T::size_type>::type + { + return std::true_type(); + } + template + static constexpr std::false_type check(...) + { + return std::false_type(); + } + typedef decltype(check(nullptr)) type; + static constexpr bool value = type::value; +}; +template +struct has_load +{ + template + static constexpr auto check(T *) -> + typename std::is_same().load(std::declval())), void>::type + { + return std::true_type(); + } + template + static constexpr std::false_type check(...) + { + return std::false_type(); + } + typedef decltype(check(nullptr)) type; + static constexpr bool value = type::value; +}; +template +size_t write_member(T const & t, std::ostream & out, sdsl::structure_tree_node * v = nullptr, std::string name = "") +{ + sdsl::structure_tree_node * child = sdsl::structure_tree::add_child(v, name, util::class_name(t)); + out.write((char *)&t, sizeof(t)); + size_t written_bytes = sizeof(t); + sdsl::structure_tree::add_size(child, written_bytes); + return written_bytes; +} +template <> +inline size_t +write_member(std::string const & t, std::ostream & out, sdsl::structure_tree_node * v, std::string name) +{ + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(t)); + size_t written_bytes = 0; + written_bytes += write_member(t.size(), out, child, "length"); + out.write(t.c_str(), t.size()); + written_bytes += t.size(); + structure_tree::add_size(v, written_bytes); + return written_bytes; +} +template +void read_member(T & t, std::istream & in) +{ + in.read((char *)&t, sizeof(t)); +} +template <> +inline void read_member(std::string & t, std::istream & in) +{ + std::string::size_type size; + read_member(size, in); + char * buf = new char[size]; + in.read(buf, size); + std::string temp(buf, size); + delete[] buf; + t = std::move(temp); +} +template +typename std::enable_if::value, typename X::size_type>::type +serialize(X const & x, std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") +{ + return x.serialize(out, v, name); +} +template +typename std::enable_if::value && std::is_trivial::value, uint64_t>::type +serialize(X const & x, std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") +{ + return write_member(x, out, v, name); +} +template +uint64_t +serialize(std::vector const & x, std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") +{ + return serialize(x.size(), out, v, name) + serialize_vector(x, out, v, name); +} +template +typename std::enable_if::value, void>::type load(X & x, std::istream & in) +{ + x.load(in); +} +template +typename std::enable_if::value && std::is_trivial::value, void>::type +load(X & x, std::istream & in) +{ + read_member(x, in); +} +template +void load(std::vector & x, std::istream & in) +{ + typename std::vector::size_type size; + load(size, in); + x.resize(size); + load_vector(x, in); +} +template +bool load_from_file(T & v, std::string const & file); +template +bool load_vector_from_file(t_int_vec & v, std::string const & file, uint8_t num_bytes = 1, uint8_t max_int_width = 64) +{ + if ((uint8_t)0 == num_bytes) + { + return load_from_file(v, file); + } + else if (num_bytes == 'd') + { + uint64_t x = 0, max_x = 0; + isfstream in(file, std::ios::in | std::ios::binary); + if (!in) + { + return false; + } + else + { + std::vector tmp; + while (in >> x) + { + tmp.push_back(x); + max_x = std::max(x, max_x); + } + v.width(bits::hi(max_x) + 1); + v.resize(tmp.size()); + for (size_t i = 0; i < tmp.size(); ++i) + { + v[i] = tmp[i]; + } + return true; + } + } + else + { + off_t file_size = util::file_size(file); + if (file_size == 0) + { + v.resize(0); + return true; + } + if (file_size % num_bytes != 0) + { + throw std::logic_error("file size " + util::to_string(file_size) + " of \"" + file + + "\" is not a multiple of " + util::to_string(num_bytes)); + return false; + } + isfstream in(file, std::ios::in | std::ios::binary); + if (in) + { + v.width(std::min((int)8 * num_bytes, (int)max_int_width)); + v.resize(file_size / num_bytes); + if (8 == t_int_vec::fixed_int_width and 1 == num_bytes) + { + in.read((char *)v.data(), file_size); + } + else + { + size_t idx = 0; + const size_t block_size = conf::SDSL_BLOCK_SIZE * num_bytes; + std::vector buf(block_size); + uint64_t x = 0; + uint8_t cur_byte = 0; + do + { + in.read((char *)buf.data(), block_size); + size_t read = in.gcount(); + uint8_t * begin = buf.data(); + uint8_t * end = begin + read; + while (begin < end) + { + x |= ((uint64_t)(*begin)) << (cur_byte * 8); + ++cur_byte; + if (cur_byte == num_bytes) + { + v[idx++] = x; + cur_byte = 0; + x = 0ULL; + } + ++begin; + } + } + while (idx < v.size()); + in.close(); + } + return true; + } + else + { + return false; + } + } +} +template +bool store_to_file(T const & v, std::string const & file); +inline bool store_to_file(char const * v, std::string const & file) +{ + osfstream out(file, std::ios::binary | std::ios::trunc | std::ios::out); + if (!out) + { + if (util::verbose) + { + std::cerr << "ERROR: store_to_file(const char *v, const std::string&)" << std::endl; + return false; + } + } + uint64_t n = strlen((char const *)v); + out.write(v, n); + out.close(); + return true; +} +template +bool store_to_file(int_vector const & v, std::string const & file); +template +bool store_to_plain_array(t_int_vec & v, std::string const & file) +{ + osfstream out(file, std::ios::out | std::ios::binary); + if (out) + { + for (typename t_int_vec::size_type i = 0; i < v.size(); ++i) + { + int_type x = v[i]; + out.write((char *)&x, sizeof(int_type)); + } + return true; + } + else + { + return false; + } +} +template +size_t +serialize_empty_object(std::ostream &, structure_tree_node * v = nullptr, std::string name = "", T const * t = nullptr) +{ + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*t)); + size_t written_bytes = 0; + structure_tree::add_size(child, written_bytes); + return written_bytes; +} +template +typename T::size_type size_in_bytes(T const & t); +template +double size_in_mega_bytes(T const & t); +struct nullstream : std::ostream +{ + struct nullbuf : std::streambuf + { + int overflow(int c) + { + return traits_type::not_eof(c); + } + int xputc(int) + { + return 0; + } + std::streamsize xsputn(char const *, std::streamsize n) + { + return n; + } + int sync() + { + return 0; + } + } m_sbuf; + nullstream() : std::ios(&m_sbuf), std::ostream(&m_sbuf), m_sbuf() + {} +}; +template +uint64_t +serialize_vector(std::vector const & vec, std::ostream & out, sdsl::structure_tree_node * v, std::string name) +{ + if (vec.size() > 0) + { + sdsl::structure_tree_node * child = + sdsl::structure_tree::add_child(v, name, "std::vector<" + util::class_name(vec[0]) + ">"); + size_t written_bytes = 0; + for (auto const & x : vec) + { + written_bytes += serialize(x, out, child, "[]"); + } + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + else + { + return 0; + } +} +template +void load_vector(std::vector & vec, std::istream & in) +{ + for (typename std::vector::size_type i = 0; i < vec.size(); ++i) + { + load(vec[i], in); + } +} +template +void write_structure(X const & x, std::ostream & out) +{ + std::unique_ptr st_node(new structure_tree_node("name", "type")); + nullstream ns; + serialize(x, ns, st_node.get(), ""); + if (st_node.get()->children.size() > 0) + { + for (auto const & child : st_node.get()->children) + { + sdsl::write_structure_tree(child.second.get(), out); + } + } +} +template +void write_structure(X const & x, std::string file) +{ + std::ofstream out(file); + write_structure(x, out); +} +template +void write_structure(std::ostream & out, Xs... xs) +{ + typedef std::unique_ptr up_stn_type; + up_stn_type st_node(new structure_tree_node("name", "type")); + _write_structure(st_node, xs...); + sdsl::write_structure_tree(st_node.get(), out); +} +template +void _write_structure(std::unique_ptr & st_node, X x, Xs... xs) +{ + nullstream ns; + serialize(x, ns, st_node.get(), ""); + _write_structure(st_node, xs...); +} +inline void _write_structure(std::unique_ptr &) +{} +inline uint64_t _parse_number(std::string::const_iterator & c, std::string::const_iterator const & end) +{ + std::string::const_iterator s = c; + while (c != end and isdigit(*c)) + ++c; + if (c > s) + { + return std::stoull(std::string(s, c)); + } + else + { + return 0; + } +} +template +t_csa const & _idx_csa(t_csa const & t, csa_tag) +{ + return t; +} +template +const typename t_cst::csa_type & _idx_csa(t_cst const & t, cst_tag) +{ + return t.csa; +} +template +std::string _idx_lcp_val(t_csa const &, uint64_t, uint64_t, csa_tag) +{ + return ""; +} +template +std::string _idx_lcp_val(t_cst const & t, uint64_t i, uint64_t w, cst_tag) +{ + return util::to_string(t.lcp[i], w); +} +template +struct default_sentinel +{ + static char const value = '$'; +}; +template +struct default_sentinel +{ + static char const value = '$'; +}; +template +struct default_sentinel +{ + static char const value = '0'; +}; +template +void csXprintf(std::ostream & out, + std::string const & format, + t_idx const & idx, + char sentinel = default_sentinel::value) +{ + typename t_idx::index_category cat; + const typename t_idx::csa_type & csa = _idx_csa(idx, cat); + std::vector res(csa.size()); + bool truncate = false; + for (std::string::const_iterator c = format.begin(), s = c; c != format.end(); s = c) + { + while (c != format.end() and *c != '%') + ++c; + if (c > s) + { + std::vector to_copy(csa.size(), std::string(s, c)); + transform(res.begin(), res.end(), to_copy.begin(), res.begin(), std::plus()); + } + if (c == format.end()) + break; + ++c; + uint64_t w = _parse_number(c, format.end()); + if (c == format.end()) + break; + uint64_t W = 0; + if (':' == *c) + { + ++c; + W = _parse_number(c, format.end()); + } + if (c == format.end()) + break; + for (uint64_t i = 0; i < csa.size(); ++i) + { + switch (*c) + { + case 'I': + res[i] += util::to_string(i, w); + break; + case 'S': + res[i] += util::to_string(csa[i], w); + break; + case 's': + res[i] += util::to_string(csa.isa[i], w); + break; + case 'P': + res[i] += util::to_string(csa.psi[i], w); + break; + case 'p': + res[i] += util::to_string(csa.lf[i], w); + break; + case 'L': + res[i] += _idx_lcp_val(idx, i, w, cat); + break; + case 'B': + if (0 == csa.bwt[i]) + { + res[i] += util::to_string(sentinel, w); + } + else + { + res[i] += util::to_string(csa.bwt[i], w); + } + break; + case 'U': + truncate = true; + SDSL_FALLTHROUGH + case 'T': + for (uint64_t k = 0; (w > 0 and k < w) or (0 == w and k < csa.size()); ++k) + { + if (0 == csa.text[(csa[i] + k) % csa.size()]) + { + res[i] += util::to_string(sentinel, W); + if (truncate) + { + truncate = false; + break; + } + } + else + { + res[i] += util::to_string(csa.text[(csa[i] + k) % csa.size()], W); + } + } + break; + case 'u': + truncate = true; + SDSL_FALLTHROUGH + case 't': + for (uint64_t k = 0; (w > 0 and k < w) or (0 == w and k < csa.size()); ++k) + { + if (0 == csa.text[(i + k) % csa.size()]) + { + res[i] += util::to_string(sentinel, W); + if (truncate) + { + truncate = false; + break; + } + } + else + { + res[i] += util::to_string(csa.text[(i + k) % csa.size()], W); + } + } + break; + case '%': + res[i] += "%"; + break; + } + } + ++c; + } + for (size_t i = 0; i < res.size(); ++i) + out << res[i] << std::endl; +} +inline std::string cache_file_name(std::string const & key, cache_config const & config) +{ + if (config.file_map.count(key) != 0) + { + return config.file_map.at(key); + } + return config.dir + "/" + key + "_" + config.id + ".sdsl"; +} +template +std::string cache_file_name(std::string const & key, cache_config const & config) +{ + return cache_file_name(key + "_" + util::class_to_hash(T()), config); +} +inline void register_cache_file(std::string const & key, cache_config & config) +{ + std::string file_name = cache_file_name(key, config); + isfstream in(file_name); + if (in) + { + config.file_map[key] = file_name; + } +} +inline bool cache_file_exists(std::string const & key, cache_config const & config) +{ + std::string file_name = cache_file_name(key, config); + isfstream in(file_name); + if (in) + { + in.close(); + return true; + } + return false; +} +template +bool cache_file_exists(std::string const & key, cache_config const & config) +{ + return cache_file_exists(key + "_" + util::class_to_hash(T()), config); +} +inline std::string tmp_file(cache_config const & config, std::string name_part = "") +{ + return config.dir + "/" + util::to_string(util::pid()) + "_" + util::to_string(util::id()) + name_part + ".sdsl"; +} +inline std::string tmp_file(std::string const & filename, std::string name_part = "") +{ + return util::dirname(filename) + "/" + util::to_string(util::pid()) + "_" + util::to_string(util::id()) + name_part + + ".sdsl"; +} +template +bool load_from_cache(T & v, std::string const & key, cache_config const & config, bool add_type_hash = false) +{ + std::string file; + if (add_type_hash) + { + file = cache_file_name(key, config); + } + else + { + file = cache_file_name(key, config); + } + if (load_from_file(v, file)) + { + if (util::verbose) + { + std::cerr << "Load `" << file << std::endl; + } + return true; + } + else + { + std::cerr << "WARNING: Could not load file '"; + std::cerr << file << "'" << std::endl; + return false; + } +} +template +bool store_to_cache(T const & v, std::string const & key, cache_config & config, bool add_type_hash = false) +{ + std::string file; + if (add_type_hash) + { + file = cache_file_name(key, config); + } + else + { + file = cache_file_name(key, config); + } + if (store_to_file(v, file)) + { + config.file_map[std::string(key)] = file; + return true; + } + else + { + std::cerr << "WARNING: store_to_cache: could not store file `" << file << "`" << std::endl; + return false; + } +} +template +bool remove_from_cache(std::string const & key, cache_config & config, bool add_type_hash = false) +{ + std::string file; + if (add_type_hash) + { + file = cache_file_name(key, config); + } + else + { + file = cache_file_name(key, config); + } + config.file_map.erase(key); + if (sdsl::remove(file) == 0) + { + return true; + } + else + { + std::cerr << "WARNING: delete_from_cache: could not delete file `" << file << "`" << std::endl; + return false; + } +} +template +typename T::size_type size_in_bytes(T const & t) +{ + nullstream ns; + return serialize(t, ns); +} +template +double size_in_mega_bytes(T const & t) +{ + return size_in_bytes(t) / (1024.0 * 1024.0); +} +template +void add_hash(T const & t, std::ostream & out) +{ + uint64_t hash_value = util::hashvalue_of_classname(t); + write_member(hash_value, out); +} +template +bool store_to_file(T const & t, std::string const & file) +{ + osfstream out(file, std::ios::binary | std::ios::trunc | std::ios::out); + if (!out) + { + if (util::verbose) + { + std::cerr << "ERROR: store_to_file not successful for: `" << file << "`" << std::endl; + } + return false; + } + serialize(t, out); + out.close(); + if (util::verbose) + { + std::cerr << "INFO: store_to_file: `" << file << "`" << std::endl; + } + return true; +} +template +bool store_to_checked_file(T const & t, std::string const & file) +{ + std::string checkfile = file + "_check"; + osfstream out(checkfile, std::ios::binary | std::ios::trunc | std::ios::out); + if (!out) + { + if (util::verbose) + { + std::cerr << "ERROR: store_to_checked_file not successful for: `" << checkfile << "`" << std::endl; + } + return false; + } + add_hash(t, out); + out.close(); + return store_to_file(t, file); +} +inline bool store_to_checked_file(char const * v, std::string const & file) +{ + std::string checkfile = file + "_check"; + osfstream out(checkfile, std::ios::binary | std::ios::trunc | std::ios::out); + if (!out) + { + if (util::verbose) + { + std::cerr << "ERROR: store_to_checked_file(const char *v, const std::string&)" << std::endl; + return false; + } + } + add_hash(v, out); + out.close(); + return store_to_file(v, file); +} +inline bool store_to_file(std::string const & v, std::string const & file) +{ + osfstream out(file, std::ios::binary | std::ios::trunc | std::ios::out); + if (!out) + { + if (util::verbose) + { + std::cerr << "ERROR: store_to_file(const std::string& v, const std::string&)" << std::endl; + return false; + } + } + out.write(v.data(), v.size()); + out.close(); + return true; +} +template +bool store_to_file(int_vector const & v, std::string const & file) +{ + osfstream out(file, std::ios::binary | std::ios::trunc | std::ios::out); + if (!out) + { + std::cerr << "ERROR: util::store_to_file:: Could not open file `" << file << "`" << std::endl; + return false; + } + else + { + if (util::verbose) + { + std::cerr << "INFO: store_to_file: `" << file << "`" << std::endl; + } + } + v.serialize(out, nullptr, ""); + out.close(); + return true; +} +template +bool store_to_checked_file(int_vector const & v, std::string const & file) +{ + std::string checkfile = file + "_check"; + osfstream out(checkfile, std::ios::binary | std::ios::trunc | std::ios::out); + if (!out) + { + std::cerr << "ERROR: util::store_to_checked_file: Could not open check file `" << checkfile << "`" << std::endl; + return false; + } + else + { + if (util::verbose) + { + std::cerr << "INFO: store_to_checked_file: `" << checkfile << "`" << std::endl; + } + } + add_hash(v, out); + out.close(); + return store_to_file(v, file); +} +template +bool load_from_file(T & v, std::string const & file) +{ + isfstream in(file, std::ios::binary | std::ios::in); + if (!in) + { + if (util::verbose) + { + std::cerr << "Could not load file `" << file << "`" << std::endl; + } + return false; + } + load(v, in); + in.close(); + if (util::verbose) + { + std::cerr << "Load file `" << file << "`" << std::endl; + } + return true; +} +template +bool load_from_checked_file(T & v, std::string const & file) +{ + isfstream in(file + "_check", std::ios::binary | std::ios::in); + if (!in) + { + if (util::verbose) + { + std::cerr << "Could not load check file `" << file << "_check`" << std::endl; + } + return false; + } + uint64_t hash_value; + read_member(hash_value, in); + if (hash_value != util::hashvalue_of_classname(v)) + { + if (util::verbose) + { + std::cerr << "File `" << file << "` is not an instance of the class `" + << sdsl::util::demangle2(typeid(T).name()) << "`" << std::endl; + } + return false; + } + return load_from_file(v, file); +} +template +inline typename std::enable_if::value + or std::is_same::value + or std::is_same::value, + std::ostream &>::type +operator<<(std::ostream & os, t_iv const & v) +{ + for (auto it = v.begin(), end = v.end(); it != end; ++it) + { + os << *it; + if (it + 1 != end) + os << " "; + } + return os; +} +template +inline typename std::enable_if::value, std::ostream &>::type +operator<<(std::ostream & os, t_iv const & v) +{ + for (auto it = v.begin(), end = v.end(); it != end; ++it) + { + os << *it; + if (it + 1 != end and std::is_same::value) + os << " "; + } + return os; +} +template +inline typename std::enable_if::value, std::ostream &>::type +operator<<(std::ostream & os, std::vector const & v) +{ + for (auto it = v.begin(), end = v.end(); it != end; ++it) + { + os << *it; + if (it + 1 != end) + os << " "; + } + return os; +} +template +inline typename std::enable_if::value, std::ostream &>::type +operator<<(std::ostream & os, t_iv const & v) +{ + for (auto it = v.begin(), end = v.end(); it != end; ++it) + { + os << *it; + if (it + 1 != end and std::is_same::value) + os << " "; + } + return os; +} +} +#endif +#ifndef INCLUDED_SDSL_MEMORY_MANAGEMENT +#define INCLUDED_SDSL_MEMORY_MANAGEMENT +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +inline void output_event_json(std::ostream & out, mm_event const & ev, tracker_storage const & m) +{ + using namespace std::chrono; + out << "\t\t" + << "\"name\" : " + << "\"" << ev.name << "\",\n"; + out << "\t\t" + << "\"usage\" : [" + << "\n"; + for (size_t j = 0; j < ev.allocations.size(); j++) + { + out << "\t\t\t[" << duration_cast(ev.allocations[j].timestamp - m.start_log).count() << "," + << ev.allocations[j].usage << "]"; + if (j + 1 < ev.allocations.size()) + { + out << ",\n"; + } + else + { + out << "\n"; + } + } + out << "\t\t" + << "]\n"; +} +template <> +inline void write_mem_log(std::ostream & out, tracker_storage const & m) +{ + auto events = m.completed_events; + std::sort(events.begin(), events.end()); + out << "[\n"; + for (size_t i = 0; i < events.size(); i++) + { + out << "\t{\n"; + output_event_json(out, events[i], m); + if (i < events.size() - 1) + { + out << "\t},\n"; + } + else + { + out << "\t}\n"; + } + } + out << "]\n"; +} +inline std::string create_mem_html_header() +{ + std::stringstream jsonheader; + jsonheader << "\n" + << "\n" + << "\n" + << "\n" + << "sdsl memory usage visualization\n" + << "\n" + << "\n" + << "\n" + << "\n" + << "
"; + return jsonbody.str(); +} +template <> +inline void write_mem_log(std::ostream & out, tracker_storage const & m) +{ + std::stringstream json_data; + write_mem_log(json_data, m); + out << create_mem_html_header(); + out << create_mem_js_body(json_data.str()); +} +#pragma pack(push, 1) +typedef struct mm_block +{ + size_t size; + struct mm_block * next; + struct mm_block * prev; +} mm_block_t; +typedef struct bfoot +{ + size_t size; +} mm_block_foot_t; +#pragma pack(pop) +#define ALIGNMENT sizeof(uint64_t) +#define ALIGNSPLIT(size) (((size)) & ~0x7) +#define ALIGN(size) (((size) + (ALIGNMENT - 1)) & ~0x7) +#define MM_BLOCK_OVERHEAD (sizeof(size_t) + sizeof(size_t)) +#define MIN_BLOCKSIZE (ALIGN(sizeof(mm_block_t) + sizeof(mm_block_foot_t))) +#define UNMASK_SIZE(size) ((size) & ~1) +#define ISFREE(size) ((size)&1) +#define SETFREE(size) ((size) | 1) +#define SPLIT_THRESHOLD (MIN_BLOCKSIZE) +inline mm_block_t * block_cur(void * ptr) +{ + mm_block_t * bptr = (mm_block_t *)((uint8_t *)ptr - sizeof(size_t)); + return bptr; +} +inline mm_block_t * block_prev(mm_block_t * cur_bptr, mm_block_t * first) +{ + if (cur_bptr == first) + return nullptr; + mm_block_foot_t * prev_foot = (mm_block_foot_t *)((uint8_t *)cur_bptr - sizeof(mm_block_foot_t)); + mm_block_t * prev_bptr = (mm_block_t *)((uint8_t *)cur_bptr - UNMASK_SIZE(prev_foot->size)); + return prev_bptr; +} +inline mm_block_t * block_next(mm_block_t * cur_bptr, uint8_t * top) +{ + if ((uint8_t *)((uint8_t *)cur_bptr + UNMASK_SIZE(cur_bptr->size)) >= top) + return nullptr; + mm_block_t * next_bptr = (mm_block_t *)((uint8_t *)cur_bptr + UNMASK_SIZE(cur_bptr->size)); + return next_bptr; +} +inline size_t block_size(void * ptr) +{ + mm_block_t * bptr = block_cur(ptr); + return UNMASK_SIZE(bptr->size); +} +inline bool block_isfree(mm_block_t * ptr) +{ + return ((ptr->size) & 1ULL); +} +inline bool block_nextfree(mm_block_t * ptr, uint8_t * top) +{ + mm_block_t * next = block_next(ptr, top); + if (next && block_isfree(next)) + return true; + return false; +} +inline bool block_prevfree(mm_block_t * ptr, mm_block_t * begin) +{ + mm_block_t * prev = block_prev(ptr, begin); + if (prev && block_isfree(prev)) + return 1; + return 0; +} +inline void foot_update(mm_block_t * ptr, size_t size) +{ + mm_block_foot_t * fptr = (mm_block_foot_t *)((uint8_t *)ptr + UNMASK_SIZE(size) - sizeof(mm_block_foot_t)); + fptr->size = size; +} +inline void block_update(mm_block_t * ptr, size_t size) +{ + ptr->size = size; + foot_update(ptr, size); +} +inline void * block_data(mm_block_t * ptr) +{ + return (void *)((uint8_t *)ptr + sizeof(size_t)); +} +inline size_t block_getdatasize(mm_block_t * ptr) +{ + size_t blocksize = UNMASK_SIZE(ptr->size); + return blocksize - sizeof(size_t) - sizeof(mm_block_foot_t); +} +inline void block_markfree(mm_block_t * ptr) +{ + block_update(ptr, SETFREE(ptr->size)); +} +inline void block_markused(mm_block_t * ptr) +{ + block_update(ptr, UNMASK_SIZE(ptr->size)); +} +#ifndef _WIN32 +class hugepage_allocator +{ +private: + uint8_t * m_base = nullptr; + mm_block_t * m_first_block = nullptr; + uint8_t * m_top = nullptr; + size_t m_total_size = 0; + std::multimap m_free_large; +private: + inline void block_print(int id, mm_block_t * bptr) + { + fprintf(stdout, + "%d addr=%p size=%lu (%lu) free=%d\n", + id, + ((void *)bptr), + UNMASK_SIZE(bptr->size), + bptr->size, + block_isfree(bptr)); + fflush(stdout); + } + inline uint64_t extract_number(std::string & line) + { + std::string num_str; + for (size_t i = line.size() - 1; i + 1 >= 1; i--) + { + if (isdigit(line[i])) + { + num_str.insert(num_str.begin(), line[i]); + } + else + { + if (num_str.size() > 0) + { + break; + } + } + } + return std::strtoull(num_str.c_str(), nullptr, 10); + } + inline uint64_t extract_multiplier(std::string & line) + { + uint64_t num = 1; + if (line[line.size() - 2] == 'k' || line[line.size() - 2] == 'K') + { + num = 1024; + } + if (line[line.size() - 2] == 'm' || line[line.size() - 2] == 'M') + { + num = 1024 * 1024; + } + if (line[line.size() - 2] == 'g' || line[line.size() - 2] == 'G') + { + num = 1024 * 1024 * 1024; + } + return num; + } + size_t determine_available_hugepage_memory() + { + size_t size_in_bytes = 0; + size_t page_size_in_bytes = 0; + size_t num_free_pages = 0; + const std::string meminfo_file = "/proc/meminfo"; + const std::string ps_str = "Hugepagesize:"; + const std::string pf_str = "HugePages_Free:"; + std::ifstream mifs(meminfo_file); + if (mifs.is_open()) + { + std::string line; + while (std::getline(mifs, line)) + { + auto ps = std::mismatch(ps_str.begin(), ps_str.end(), line.begin()); + if (ps.first == ps_str.end()) + { + page_size_in_bytes = extract_number(line) * extract_multiplier(line); + } + auto pf = std::mismatch(pf_str.begin(), pf_str.end(), line.begin()); + if (pf.first == pf_str.end()) + { + num_free_pages = extract_number(line); + } + } + size_in_bytes = page_size_in_bytes * num_free_pages; + } + else + { + throw std::system_error(ENOMEM, + std::system_category(), + "hugepage_allocator could not automatically determine available hugepages"); + } + return size_in_bytes; + } + void coalesce_block(mm_block_t * block) + { + mm_block_t * newblock = block; + if (block_nextfree(block, m_top)) + { + mm_block_t * next = block_next(block, m_top); + remove_from_free_set(next); + block_update(block, UNMASK_SIZE(block->size) + UNMASK_SIZE(next->size)); + } + if (block_prevfree(block, m_first_block)) + { + mm_block_t * prev = block_prev(block, m_first_block); + remove_from_free_set(prev); + newblock = prev; + block_update(prev, UNMASK_SIZE(prev->size) + UNMASK_SIZE(block->size)); + } + if (newblock) + { + block_markfree(newblock); + insert_into_free_set(newblock); + } + } + void split_block(mm_block_t * bptr, size_t size) + { + size_t blocksize = UNMASK_SIZE(bptr->size); + int64_t newblocksize = ALIGNSPLIT(blocksize - ALIGN(size + MM_BLOCK_OVERHEAD)); + if (newblocksize >= (int64_t)SPLIT_THRESHOLD) + { + block_update(bptr, blocksize - newblocksize); + mm_block_t * newblock = (mm_block_t *)((char *)bptr + (blocksize - newblocksize)); + block_update(newblock, newblocksize); + coalesce_block(newblock); + } + } + uint8_t * hsbrk(size_t size) + { + ptrdiff_t left = (ptrdiff_t)m_total_size - (m_top - m_base); + if (left < (ptrdiff_t)size) + { + throw std::system_error(ENOMEM, + std::system_category(), + "hugepage_allocator: not enough hugepage memory available"); + } + uint8_t * new_mem = m_top; + m_top += size; + return new_mem; + } + mm_block_t * new_block(size_t size) + { + size = ALIGN(size + MM_BLOCK_OVERHEAD); + if (size < MIN_BLOCKSIZE) + size = MIN_BLOCKSIZE; + mm_block_t * ptr = (mm_block_t *)hsbrk(size); + block_update(ptr, size); + return ptr; + } + void remove_from_free_set(mm_block_t * block) + { + auto eq_range = m_free_large.equal_range(block->size); + auto itr = eq_range.first; + auto last = eq_range.second; + auto found = m_free_large.end(); + while (itr != last) + { + if (itr->second == block) + { + found = itr; + } + ++itr; + } + if (found == m_free_large.end()) + { + found = last; + } + m_free_large.erase(found); + } + void insert_into_free_set(mm_block_t * block) + { + m_free_large.insert({block->size, block}); + } + mm_block_t * find_free_block(size_t size_in_bytes) + { + mm_block_t * bptr = nullptr; + auto free_block = m_free_large.lower_bound(size_in_bytes); + if (free_block != m_free_large.end()) + { + bptr = free_block->second; + m_free_large.erase(free_block); + } + return bptr; + } + mm_block_t * last_block() + { + mm_block_t * last = nullptr; + if (m_top != m_base) + { + mm_block_foot_t * fptr = (mm_block_foot_t *)(m_top - sizeof(size_t)); + last = (mm_block_t *)(((uint8_t *)fptr) - UNMASK_SIZE(fptr->size) + sizeof(size_t)); + } + return last; + } + void print_heap() + { + mm_block_t * bptr = m_first_block; + size_t id = 0; + while (bptr) + { + block_print(id, bptr); + id++; + bptr = block_next(bptr, m_top); + } + } +public: + void init(SDSL_UNUSED size_t size_in_bytes = 0) + { +# ifdef MAP_HUGETLB + if (size_in_bytes == 0) + { + size_in_bytes = determine_available_hugepage_memory(); + } + m_total_size = size_in_bytes; + m_base = (uint8_t *) + mmap(nullptr, m_total_size, (PROT_READ | PROT_WRITE), (MAP_HUGETLB | MAP_ANONYMOUS | MAP_PRIVATE), 0, 0); + if (m_base == MAP_FAILED) + { + throw std::system_error(ENOMEM, std::system_category(), "hugepage_allocator could not allocate hugepages"); + } + else + { + m_top = m_base; + m_first_block = (mm_block_t *)m_base; + } +# else + throw std::system_error(ENOMEM, + std::system_category(), + "hugepage_allocator: MAP_HUGETLB / hugepage support not available"); +# endif + } + void * mm_realloc(void * ptr, size_t size) + { + if (nullptr == ptr) + return mm_alloc(size); + if (size == 0) + { + mm_free(ptr); + return nullptr; + } + mm_block_t * bptr = block_cur(ptr); + bool need_malloc = 0; + size_t blockdatasize = block_getdatasize(bptr); + if (size == blockdatasize) + { + return ptr; + } + if (size < blockdatasize) + { + split_block(bptr, size); + } + else + { + mm_block_t * next = block_next(bptr, m_top); + if (!next) + { + blockdatasize = block_getdatasize(bptr); + size_t needed = ALIGN(size - blockdatasize); + hsbrk(needed); + block_update(bptr, UNMASK_SIZE(bptr->size) + needed); + return block_data(bptr); + } + else + { + if (next && block_isfree(next)) + { + if (blockdatasize + UNMASK_SIZE(next->size) >= size) + { + remove_from_free_set(next); + block_update(bptr, UNMASK_SIZE(bptr->size) + UNMASK_SIZE(next->size)); + } + else + { + need_malloc = true; + } + } + else + { + mm_block_t * prev = block_prev(bptr, m_first_block); + if (prev && block_isfree(prev)) + { + if (blockdatasize + UNMASK_SIZE(prev->size) >= size) + { + remove_from_free_set(prev); + size_t newsize = UNMASK_SIZE(prev->size) + UNMASK_SIZE(bptr->size); + block_update(prev, newsize); + block_markused(prev); + ptr = memmove(block_data(prev), ptr, blockdatasize); + } + else + { + need_malloc = true; + } + } + else + { + need_malloc = true; + } + } + } + } + if (need_malloc) + { + void * newptr = mm_alloc(size); + memcpy(newptr, ptr, size); + mm_free(ptr); + ptr = newptr; + } + return ptr; + } + void * mm_alloc(size_t size_in_bytes) + { + mm_block_t * bptr = nullptr; + if ((bptr = find_free_block(size_in_bytes + MM_BLOCK_OVERHEAD)) != nullptr) + { + block_markused(bptr); + split_block(bptr, size_in_bytes); + } + else + { + bptr = last_block(); + if (bptr && block_isfree(bptr)) + { + size_t blockdatasize = block_getdatasize(bptr); + size_t needed = ALIGN(size_in_bytes - blockdatasize); + hsbrk(needed); + remove_from_free_set(bptr); + block_update(bptr, blockdatasize + needed + sizeof(size_t) + sizeof(mm_block_foot_t)); + block_markused(bptr); + } + else + { + bptr = new_block(size_in_bytes); + } + } + return block_data(bptr); + } + void mm_free(void * ptr) + { + if (ptr) + { + mm_block_t * bptr = block_cur(ptr); + block_markfree(bptr); + coalesce_block(bptr); + } + } + bool in_address_space(void * ptr) + { + if (ptr == nullptr) + { + return true; + } + if (ptr >= m_base && ptr < m_top) + { + return true; + } + return false; + } + static hugepage_allocator & the_allocator() + { + static hugepage_allocator a; + return a; + } +}; +#endif +class memory_manager +{ +private: + bool hugepages = false; +private: + static memory_manager & the_manager() + { + static memory_manager m; + return m; + } +public: + static uint64_t * alloc_mem(size_t size_in_bytes) + { +#ifndef _WIN32 + auto & m = the_manager(); + if (m.hugepages) + { + return (uint64_t *)hugepage_allocator::the_allocator().mm_alloc(size_in_bytes); + } +#endif + return (uint64_t *)calloc(size_in_bytes, 1); + } + static void free_mem(uint64_t * ptr) + { +#ifndef _WIN32 + auto & m = the_manager(); + if (m.hugepages and hugepage_allocator::the_allocator().in_address_space(ptr)) + { + hugepage_allocator::the_allocator().mm_free(ptr); + return; + } +#endif + std::free(ptr); + } + static uint64_t * realloc_mem(uint64_t * ptr, size_t size) + { +#ifndef _WIN32 + auto & m = the_manager(); + if (m.hugepages and hugepage_allocator::the_allocator().in_address_space(ptr)) + { + return (uint64_t *)hugepage_allocator::the_allocator().mm_realloc(ptr, size); + } +#endif + return (uint64_t *)realloc(ptr, size); + } +public: + static void use_hugepages(size_t bytes = 0) + { +#ifndef _WIN32 + auto & m = the_manager(); + hugepage_allocator::the_allocator().init(bytes); + m.hugepages = true; +#else + throw std::runtime_error(std::string("hugepages not supported on Windows")); + (void)bytes; +#endif + } + template + static void resize(t_vec & v, const typename t_vec::size_type capacity) + { + uint64_t old_capacity_in_bytes = ((v.m_capacity + 63) >> 6) << 3; + uint64_t new_capacity_in_bytes = ((capacity + 63) >> 6) << 3; + bool do_realloc = old_capacity_in_bytes != new_capacity_in_bytes; + v.m_capacity = ((capacity + 63) >> 6) << 6; + if (do_realloc || v.m_data == nullptr) + { + size_t allocated_bytes = (size_t)(((v.m_capacity + 64) >> 6) << 3); + v.m_data = memory_manager::realloc_mem(v.m_data, allocated_bytes); + if (allocated_bytes != 0 && v.m_data == nullptr) + { + throw std::bad_alloc(); + } + if (do_realloc) + { + memory_monitor::record((int64_t)new_capacity_in_bytes - (int64_t)old_capacity_in_bytes); + } + } + } + template + static void clear(t_vec & v) + { + int64_t size_in_bytes = ((v.m_size + 63) >> 6) << 3; + memory_manager::free_mem(v.m_data); + v.m_data = nullptr; + if (size_in_bytes) + { + memory_monitor::record(size_in_bytes * -1); + } + } + static int open_file_for_mmap(std::string & filename, std::ios_base::openmode mode) + { + if (is_ram_file(filename)) + { + return ram_fs::open(filename); + } +#ifdef MSVC_COMPILER + int fd = -1; + if (!(mode & std::ios_base::out)) + _sopen_s(&fd, filename.c_str(), _O_BINARY | _O_RDONLY, _SH_DENYNO, _S_IREAD); + else + _sopen_s(&fd, filename.c_str(), _O_BINARY | _O_RDWR, _SH_DENYNO, _S_IREAD | _S_IWRITE); + return fd; +#else + if (!(mode & std::ios_base::out)) + return open(filename.c_str(), O_RDONLY); + else + return open(filename.c_str(), O_RDWR); +#endif + return -1; + } + static void * mmap_file(int fd, uint64_t file_size, std::ios_base::openmode mode) + { + if (file_size == 0) + { + std::cout << "file_size=0" << std::endl; + return nullptr; + } + if (is_ram_file(fd)) + { + if (ram_fs::file_size(fd) < file_size) + return nullptr; + auto & file_content = ram_fs::content(fd); + return file_content.data(); + } + memory_monitor::record(file_size); +#ifdef _WIN32 + HANDLE fh = (HANDLE)_get_osfhandle(fd); + if (fh == INVALID_HANDLE_VALUE) + { + return nullptr; + } + HANDLE fm; + if (!(mode & std::ios_base::out)) + { + fm = CreateFileMapping(fh, NULL, PAGE_READONLY, 0, 0, NULL); + } + else + fm = CreateFileMapping(fh, NULL, PAGE_READWRITE, 0, 0, NULL); + if (fm == NULL) + { + return nullptr; + } + void * map = nullptr; + if (!(mode & std::ios_base::out)) + { + map = MapViewOfFile(fm, FILE_MAP_READ, 0, 0, file_size); + } + else + map = MapViewOfFile(fm, FILE_MAP_WRITE | FILE_MAP_READ, 0, 0, file_size); + CloseHandle(fm); + return map; +#else + void * map = nullptr; + if (!(mode & std::ios_base::out)) + map = mmap(NULL, file_size, PROT_READ, MAP_SHARED, fd, 0); + else + map = mmap(NULL, file_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) + map = nullptr; + return map; +#endif + return nullptr; + } + static int mem_unmap(int fd, void * addr, const uint64_t size) + { + if (addr == nullptr) + { + return 0; + } + if (is_ram_file(fd)) + { + return 0; + } + memory_monitor::record(-((int64_t)size)); +#ifdef _WIN32 + if (UnmapViewOfFile(addr)) + return 0; + return -1; +#else + return munmap(addr, size); +#endif + return -1; + } + static int close_file_for_mmap(int fd) + { + if (is_ram_file(fd)) + { + return ram_fs::close(fd); + } +#ifdef MSVC_COMPILER + return _close(fd); +#else + return close(fd); +#endif + return -1; + } + static int truncate_file_mmap(int fd, const uint64_t new_size) + { + if (is_ram_file(fd)) + { + return ram_fs::truncate(fd, new_size); + } +#ifdef _WIN32 + auto ret = _chsize_s(fd, new_size); + if (ret != 0) + ret = -1; + return ret; +#else + return ftruncate(fd, new_size); +#endif + return -1; + } +}; +#undef ALIGNMENT +#undef ALIGNSPLIT +#undef ALIGN +#undef MM_BLOCK_OVERHEAD +#undef MIN_BLOCKSIZE +#undef UNMASK_SIZE +#undef ISFREE +#undef SETFREE +#undef SPLIT_THRESHOLD +} +#endif +namespace sdsl +{ +typedef uint64_t std_size_type_for_int_vector; +template +class int_vector; +typedef int_vector<1> bit_vector; +template +class int_vector_const_iterator; +template +class int_vector_iterator; +template +class int_vector_iterator_base; +template +class int_vector_reference; +template +class rank_support_v; +template +class select_support_mcl; +namespace coder +{ +template +class elias_delta; +template +class elias_gamma; +template +class fibonacci; +template +class comma; +} +template +struct int_vec_category_trait +{ + typedef iv_tag type; +}; +template <> +struct int_vec_category_trait<1> +{ + typedef bv_tag type; +}; +template +struct int_vector_trait +{ + typedef uint64_t value_type; + typedef int_vector int_vector_type; + typedef int_vector_reference reference; + typedef uint64_t const_reference; + typedef uint8_t int_width_type; + typedef int_vector_iterator iterator; + typedef int_vector_const_iterator const_iterator; + static iterator begin(int_vector_type * v) noexcept + { + return iterator(v, 0); + } + static iterator end(int_vector_type * v) noexcept + { + return iterator(v, v->size() * v->width()); + } + static const_iterator begin(int_vector_type const * v) noexcept + { + return const_iterator(v, 0); + } + static const_iterator end(int_vector_type const * v) noexcept + { + return const_iterator(v, v->size() * v->width()); + } + static void set_width(uint8_t new_width, int_width_type & width) noexcept + { + if constexpr (t_width == 0) + width = new_width ? std::min(new_width, uint8_t{64u}) : 64u; + } +}; +template <> +struct int_vector_trait<64> +{ + typedef uint64_t value_type; + typedef int_vector<64> int_vector_type; + typedef uint64_t & reference; + typedef uint64_t const_reference; + typedef uint8_t int_width_type; + typedef uint64_t * iterator; + typedef uint64_t const * const_iterator; + static iterator begin(int_vector_type * v) noexcept; + static iterator end(int_vector_type * v) noexcept; + static const_iterator begin(int_vector_type const * v) noexcept; + static const_iterator end(int_vector_type const * v) noexcept; + static void set_width(uint8_t, int_width_type) noexcept + {} +}; +template <> +struct int_vector_trait<32> +{ + typedef uint32_t value_type; + typedef int_vector<32> int_vector_type; + typedef uint32_t & reference; + typedef uint32_t const_reference; + typedef uint8_t int_width_type; + typedef uint32_t * iterator; + typedef uint32_t const * const_iterator; + static iterator begin(int_vector_type * v) noexcept; + static iterator end(int_vector_type * v) noexcept; + static const_iterator begin(int_vector_type const * v) noexcept; + static const_iterator end(int_vector_type const * v) noexcept; + static void set_width(uint8_t, int_width_type) noexcept + {} +}; +template <> +struct int_vector_trait<16> +{ + typedef uint16_t value_type; + typedef int_vector<16> int_vector_type; + typedef uint16_t & reference; + typedef uint16_t const_reference; + typedef uint8_t int_width_type; + typedef uint16_t * iterator; + typedef uint16_t const * const_iterator; + static iterator begin(int_vector_type * v) noexcept; + static iterator end(int_vector_type * v) noexcept; + static const_iterator begin(int_vector_type const * v) noexcept; + static const_iterator end(int_vector_type const * v) noexcept; + static void set_width(uint8_t, int_width_type) noexcept + {} +}; +template <> +struct int_vector_trait<8> +{ + typedef uint8_t value_type; + typedef int_vector<8> int_vector_type; + typedef uint8_t & reference; + typedef uint8_t const_reference; + typedef uint8_t int_width_type; + typedef uint8_t * iterator; + typedef uint8_t const * const_iterator; + static iterator begin(int_vector_type * v) noexcept; + static iterator end(int_vector_type * v) noexcept; + static const_iterator begin(int_vector_type const * v) noexcept; + static const_iterator end(int_vector_type const * v) noexcept; + static void set_width(uint8_t, int_width_type) noexcept + {} +}; +template +class int_vector +{ +private: + static_assert(t_width <= 64, "int_vector: width of must be at most 64bits."); +public: + typedef typename int_vector_trait::value_type value_type; + typedef typename int_vector_trait::iterator iterator; + typedef typename int_vector_trait::const_iterator const_iterator; + typedef typename int_vector_trait::reference reference; + typedef typename int_vector_trait::const_reference const_reference; + typedef int_vector_reference * pointer; + typedef value_type const * const_pointer; + typedef ptrdiff_t difference_type; + typedef int_vector_size_type size_type; + typedef typename int_vector_trait::int_width_type int_width_type; + typedef rank_support_v<1, 1> rank_1_type; + typedef rank_support_v<0, 1> rank_0_type; + typedef select_support_mcl<1, 1> select_1_type; + typedef select_support_mcl<0, 1> select_0_type; + typedef typename int_vec_category_trait::type index_category; + friend struct int_vector_trait; + friend class int_vector_iterator_base; + friend class int_vector_iterator; + friend class int_vector_const_iterator; + template + friend class int_vector_mapper; + template + friend class coder::elias_delta; + template + friend class coder::elias_gamma; + template + friend class coder::fibonacci; + template + friend class coder::comma; + friend class memory_manager; + static constexpr uint8_t fixed_int_width = t_width; + float growth_factor = 1.5; +private: + size_type m_size; + size_type m_capacity; + uint64_t * m_data; + int_width_type m_width; + void bit_resize(const size_type size, const value_type value); + void amortized_resize(const size_type size) + { + assert(growth_factor > 1.0); + if constexpr (t_width != 0) + { + size_type const bit_size{size * t_width}; + if (bit_size > m_capacity || m_data == nullptr) + { + size_type new_capacity = std::max(m_capacity, 64u); + while (new_capacity < bit_size) + new_capacity *= growth_factor; + memory_manager::resize(*this, new_capacity); + } + m_size = bit_size; + } + else + { + size_type const bit_size{size * m_width}; + if (bit_size > m_capacity || m_data == nullptr) + { + size_type new_capacity = std::max(m_capacity, 64u); + while (new_capacity < bit_size) + new_capacity *= growth_factor; + memory_manager::resize(*this, new_capacity); + } + m_size = bit_size; + } + } + size_type bit_data_size() const + { + return (m_size + 63) >> 6; + } +public: + int_vector(size_type size, value_type default_value, uint8_t int_width = t_width); + explicit int_vector(size_type size = 0) : int_vector(size, static_cast(0), t_width) + {} + int_vector(std::initializer_list il) : int_vector(0, 0) + { + assign(il); + } + template + int_vector(typename std::enable_if< + std::is_base_of::iterator_category>::value, + input_iterator_t>::type first, + input_iterator_t last) : + int_vector(0, 0) + { + assign(first, last); + } + void clear() noexcept + { + m_size = 0; + } + iterator erase(const_iterator it) + { + iterator it_nonconst = begin() + (it - cbegin()); + std::copy(it_nonconst + 1, end(), it_nonconst); + resize(size() - 1); + return it_nonconst; + } + iterator erase(const_iterator first, const_iterator last) + { + iterator first_nonconst = begin() + (first - cbegin()); + iterator last_nonconst = begin() + (last - cbegin()); + std::copy(last_nonconst, end(), first_nonconst); + resize(size() - (last - first)); + return first_nonconst; + } + template + iterator emplace(const_iterator it, Args &&... args) + { + return insert(it, 1, value_type(std::forward(args)...)); + } + iterator insert(const_iterator it, value_type value) + { + return insert(it, 1, value); + } + iterator insert(const_iterator it, size_type n, value_type value) + { + size_type pos = it - cbegin(); + amortized_resize(size() + n); + iterator it_new = begin() + pos; + std::copy_backward(it_new, end() - n, end()); + std::fill_n(it_new, n, value); + return it_new; + } + iterator insert(const_iterator it, std::initializer_list il) + { + return insert(it, il.begin(), il.end()); + } + template + typename std::enable_if::iterator_category>::value, + iterator>::type + insert(const_iterator it, input_iterator_t first, input_iterator_t last) + { + size_type pos = it - cbegin(); + amortized_resize(size() + last - first); + iterator it_new = begin() + pos; + std::copy_backward(it_new, end() - (last - first), end()); + std::copy(first, last, it_new); + return it_new; + } + reference front() noexcept + { + return *begin(); + } + const_reference front() const noexcept + { + return *cbegin(); + } + reference back() noexcept + { + return *(end() - 1); + } + const_reference back() const noexcept + { + return *(cend() - 1); + } + template + void emplace_back(Args &&... args) + { + push_back(value_type(std::forward(args)...)); + } + void push_back(value_type value) + { + amortized_resize(size() + 1); + *(end() - 1) = value; + } + void pop_back() + { + resize(size() - 1); + } + int_vector(int_vector && v); + int_vector(int_vector const & v); + ~int_vector(); + void assign(size_type size, value_type default_value) + { + bit_resize(size * m_width); + util::set_to_value(*this, default_value); + } + void assign(std::initializer_list il) + { + bit_resize(il.size() * m_width); + size_type idx = 0; + for (auto x : il) + { + (*this)[idx++] = x; + } + } + template + void assign(input_iterator_t first, input_iterator_t last) + { + assert(first <= last); + bit_resize((last - first) * m_width); + size_type idx = 0; + while (first < last) + { + (*this)[idx++] = *(first++); + } + } + bool empty() const noexcept + { + return 0 == m_size; + } + void swap(int_vector & v) noexcept + { + std::swap(v, *this); + } + void shrink_to_fit() + { + memory_manager::resize(*this, m_size); + } + void reserve(size_type capacity) + { + if (capacity * m_width > m_capacity || m_data == nullptr) + { + memory_manager::resize(*this, capacity * m_width); + } + } + void resize(const size_type size) + { + resize(size, 0); + } + void resize(const size_type size, const value_type value) + { + bit_resize(size * m_width, value); + } + void bit_resize(const size_type size); + inline size_type size() const noexcept; + static size_type max_size() noexcept + { + return ((size_type)1) << (sizeof(size_type) * 8 - 6); + } + size_type bit_size() const noexcept + { + return m_size; + } + inline size_type capacity() const noexcept; + size_type bit_capacity() const noexcept + { + return m_capacity; + } + uint64_t const * data() const noexcept + { + return m_data; + } + uint64_t * data() noexcept + { + return m_data; + } + value_type get_int(size_type idx, const uint8_t len = 64) const; + void set_int(size_type idx, value_type x, const uint8_t len = 64); + uint8_t width() const noexcept + { + return m_width; + } + void width(uint8_t new_width) noexcept + { + int_vector_trait::set_width(new_width, m_width); + } + size_type write_data(std::ostream & out) const; + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const; + void load(std::istream & in); + template + inline typename std::enable_if< + !cereal::traits::is_output_serializable>, archive_t>::value, + void>::type + CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const; + template + inline typename std::enable_if< + cereal::traits::is_output_serializable>, archive_t>::value, + void>::type + CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const; + template + inline typename std::enable_if< + !cereal::traits::is_input_serializable>, archive_t>::value, + void>::type + CEREAL_LOAD_FUNCTION_NAME(archive_t & ar); + template + inline typename std::enable_if< + cereal::traits::is_input_serializable>, archive_t>::value, + void>::type + CEREAL_LOAD_FUNCTION_NAME(archive_t & ar); + inline reference operator[](size_type const & i) noexcept; + inline const_reference operator[](size_type const & i) const noexcept; + reference at(size_type const & i) + { + return (*this)[i]; + } + const_reference at(size_type const & i) const + { + return (*this)[i]; + } + int_vector & operator=(int_vector const & v); + int_vector & operator=(int_vector && v); + bool operator==(int_vector const & v) const noexcept + { + if (bit_size() != v.bit_size()) + return false; + if (empty()) + return true; + uint64_t const * data1 = v.data(); + uint64_t const * data2 = data(); + for (size_type i = 0; i < bit_data_size() - 1; ++i) + { + if (*(data1++) != *(data2++)) + return false; + } + uint8_t l = 64 - ((bit_data_size() << 6) - m_size); + return ((*data1) & bits::lo_set[l]) == ((*data2) & bits::lo_set[l]); + } + template + bool operator==(int_vector const & v) const noexcept + { + return (this->size() == v.size()) && std::equal(this->begin(), this->end(), v.begin()); + } + template + bool operator!=(int_vector const & v) const noexcept + { + return !(*this == v); + } + bool operator<(int_vector const & v) const noexcept; + bool operator>(int_vector const & v) const noexcept; + bool operator<=(int_vector const & v) const noexcept; + bool operator>=(int_vector const & v) const noexcept; + int_vector & operator&=(int_vector const & v); + int_vector & operator|=(int_vector const & v); + int_vector & operator^=(int_vector const & v); + iterator begin() noexcept + { + return int_vector_trait::begin(this); + } + iterator end() noexcept + { + return int_vector_trait::end(this); + } + const_iterator begin() const noexcept + { + return int_vector_trait::begin(this); + } + const_iterator end() const noexcept + { + return int_vector_trait::end(this); + } + const_iterator cbegin() const noexcept + { + return int_vector_trait::begin(this); + } + const_iterator cend() const noexcept + { + return int_vector_trait::end(this); + } + void flip() + { + static_assert(1 == t_width, "int_vector: flip() is available only for bit_vector."); + if (!empty()) + { + for (uint64_t i = 0; i < bit_data_size(); ++i) + { + m_data[i] = ~m_data[i]; + } + } + } + static size_t read_header(int_vector_size_type & size, int_width_type & int_width, std::istream & in) + { + uint64_t width_and_size = 0; + read_member(width_and_size, in); + size = width_and_size & bits::lo_set[56]; + uint8_t read_int_width = (uint8_t)(width_and_size >> 56); + if (t_width == 0) + { + int_width = read_int_width; + } + if (t_width > 0 and t_width != read_int_width) + { + std::cerr << "Warning: Width of int_vector<" << (size_t)t_width << ">"; + std::cerr << " was specified as " << (size_type)read_int_width << std::endl; + std::cerr << "Length is " << size << " bits" << std::endl; + } + return sizeof(width_and_size); + } + static uint64_t write_header(uint64_t size, uint8_t int_width, std::ostream & out) + { + if (t_width > 0) + { + if (t_width != int_width) + { + std::cout << "Warning: writing width=" << (size_type)int_width << " != fixed " << (size_type)t_width + << std::endl; + } + } + uint64_t width_and_size = (((uint64_t)int_width) << 56) | size; + return write_member(width_and_size, out); + } + struct raw_wrapper + { + int_vector const & vec; + raw_wrapper() = delete; + raw_wrapper(int_vector const & _vec) : vec(_vec) + {} + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + auto written_bytes = vec.write_data(out); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + }; + const raw_wrapper raw = raw_wrapper(*this); +}; +template +class int_vector_reference +{ +public: + typedef typename t_int_vector::value_type value_type; +private: + typename t_int_vector::value_type * const m_word; + const uint8_t m_offset; + const uint8_t m_len; +public: + int_vector_reference() = delete; + constexpr int_vector_reference(int_vector_reference const &) noexcept = default; + constexpr int_vector_reference(int_vector_reference &&) noexcept = default; + int_vector_reference(value_type * word, uint8_t offset, uint8_t len) noexcept : + m_word(word), + m_offset(offset), + m_len(len){}; + int_vector_reference & operator=(value_type x) noexcept + { + bits::write_int(m_word, x, m_offset, m_len); + return *this; + }; + int_vector_reference & operator=(int_vector_reference const & x) noexcept + { + return *this = value_type(x); + }; + int_vector_reference & operator=(int_vector_reference && x) noexcept + { + return *this = value_type(std::move(x)); + }; + operator value_type() const noexcept + { + return bits::read_int(m_word, m_offset, m_len); + } + int_vector_reference & operator++() noexcept + { + value_type x = bits::read_int(m_word, m_offset, m_len); + bits::write_int(m_word, x + 1, m_offset, m_len); + return *this; + } + value_type operator++(int) noexcept + { + value_type val = (typename t_int_vector::value_type) * this; + ++(*this); + return val; + } + int_vector_reference & operator--() noexcept + { + value_type x = bits::read_int(m_word, m_offset, m_len); + bits::write_int(m_word, x - 1, m_offset, m_len); + return *this; + } + value_type operator--(int) noexcept + { + value_type val = (value_type) * this; + --(*this); + return val; + } + int_vector_reference & operator+=(const value_type x) noexcept + { + value_type w = bits::read_int(m_word, m_offset, m_len); + bits::write_int(m_word, w + x, m_offset, m_len); + return *this; + } + int_vector_reference & operator-=(const value_type x) noexcept + { + value_type w = bits::read_int(m_word, m_offset, m_len); + bits::write_int(m_word, w - x, m_offset, m_len); + return *this; + } + bool operator==(int_vector_reference const & x) const noexcept + { + return value_type(*this) == value_type(x); + } + bool operator<(int_vector_reference const & x) const noexcept + { + return value_type(*this) < value_type(x); + } +}; +template +inline void swap(int_vector_reference x, int_vector_reference y) noexcept +{ + typename int_vector_reference::value_type tmp = x; + x = y; + y = tmp; +} +template +inline void swap(typename int_vector_reference::value_type & x, + int_vector_reference y) noexcept +{ + typename int_vector_reference::value_type tmp = x; + x = y; + y = tmp; +} +template +inline void swap(int_vector_reference x, + typename int_vector_reference::value_type & y) noexcept +{ + typename int_vector_reference::value_type tmp = x; + x = y; + y = tmp; +} +template <> +class int_vector_reference +{ +public: + typedef bool value_type; +private: + uint64_t * const m_word; + uint64_t m_mask; +public: + int_vector_reference() = delete; + constexpr int_vector_reference(int_vector_reference const &) noexcept = default; + constexpr int_vector_reference(int_vector_reference &&) noexcept = default; + int_vector_reference(uint64_t * word, uint8_t offset, uint8_t) noexcept : m_word(word), m_mask(1ULL << offset){}; + int_vector_reference & operator=(bool x) noexcept + { + if (x) + *m_word |= m_mask; + else + *m_word &= ~m_mask; + return *this; + }; + int_vector_reference & operator=(int_vector_reference const & x) noexcept + { + return *this = bool(x); + }; + int_vector_reference & operator=(int_vector_reference && x) noexcept + { + return *this = bool(x); + }; + operator bool() const noexcept + { + return !!(*m_word & m_mask); + } + bool operator==(int_vector_reference const & x) const noexcept + { + return bool(*this) == bool(x); + } + bool operator<(int_vector_reference const & x) const noexcept + { + return !bool(*this) && bool(x); + } +}; +template <> +inline void swap(int_vector_reference x, int_vector_reference y) noexcept +{ + bool tmp = x; + x = y; + y = tmp; +} +template <> +inline void swap(bool & x, int_vector_reference y) noexcept +{ + bool tmp = x; + x = y; + y = tmp; +} +template <> +inline void swap(int_vector_reference x, bool & y) noexcept +{ + bool tmp = x; + x = y; + y = tmp; +} +template +class int_vector_iterator_base +{ +public: + using iterator_category = std::random_access_iterator_tag; + using value_type = typename t_int_vector::value_type; + using difference_type = typename t_int_vector::difference_type; + using pointer = value_type *; + using reference = value_type &; + typedef uint64_t size_type; +protected: + uint8_t m_offset; + uint8_t m_len; +public: + int_vector_iterator_base(uint8_t offset, uint8_t len) : m_offset(offset), m_len(len) + {} + int_vector_iterator_base(t_int_vector const * v = nullptr, size_type idx = 0) : + m_offset(idx & 0x3F), + m_len(v == nullptr ? 0 : v->m_width) + {} +}; +template +class int_vector_iterator : public int_vector_iterator_base +{ +public: + typedef int_vector_reference reference; + typedef uint64_t value_type; + typedef int_vector_iterator iterator; + typedef reference * pointer; + typedef typename t_int_vector::size_type size_type; + typedef typename t_int_vector::difference_type difference_type; + friend class int_vector_const_iterator; +private: + using int_vector_iterator_base::m_offset; + using int_vector_iterator_base::m_len; + typename t_int_vector::value_type * m_word; +public: + int_vector_iterator(t_int_vector * v = nullptr, size_type idx = 0) : + int_vector_iterator_base(v, idx), + m_word((v != nullptr) ? v->m_data + (idx >> 6) : nullptr) + {} + int_vector_iterator(int_vector_iterator const & it) : + int_vector_iterator_base(it), + m_word(it.m_word) + { + m_offset = it.m_offset; + m_len = it.m_len; + } + reference operator*() const + { + return reference(m_word, m_offset, m_len); + } + iterator & operator++() + { + m_offset += m_len; + if (m_offset >= 64) + { + m_offset &= 0x3F; + ++m_word; + } + return *this; + } + iterator operator++(int) + { + int_vector_iterator it = *this; + ++(*this); + return it; + } + iterator & operator--() + { + m_offset -= m_len; + if (m_offset >= 64) + { + m_offset &= 0x3F; + --m_word; + } + return *this; + } + iterator operator--(int) + { + int_vector_iterator it = *this; + --(*this); + return it; + } + iterator & operator+=(difference_type i) + { + if (i < 0) + return *this -= (-i); + difference_type t = i * m_len; + m_word += (t >> 6); + if ((m_offset += (t & 0x3F)) & ~0x3F) + { + ++m_word; + m_offset &= 0x3F; + } + return *this; + } + iterator & operator-=(difference_type i) + { + if (i < 0) + return *this += (-i); + difference_type t = i * m_len; + m_word -= (t >> 6); + if ((m_offset -= (t & 0x3F)) & ~0x3F) + { + --m_word; + m_offset &= 0x3F; + } + return *this; + } + iterator & operator=(int_vector_iterator const & it) + { + if (this != &it) + { + m_word = it.m_word; + m_offset = it.m_offset; + m_len = it.m_len; + } + return *this; + } + iterator operator+(difference_type i) const + { + iterator it = *this; + return it += i; + } + iterator operator-(difference_type i) const + { + iterator it = *this; + return it -= i; + } + reference operator[](difference_type i) const + { + return *(*this + i); + } + bool operator==(int_vector_iterator const & it) const noexcept + { + return it.m_word == m_word && it.m_offset == m_offset; + } + bool operator!=(int_vector_iterator const & it) const noexcept + { + return !(*this == it); + } + bool operator<(int_vector_iterator const & it) const noexcept + { + if (m_word == it.m_word) + return m_offset < it.m_offset; + return m_word < it.m_word; + } + bool operator>(int_vector_iterator const & it) const noexcept + { + if (m_word == it.m_word) + return m_offset > it.m_offset; + return m_word > it.m_word; + } + bool operator>=(int_vector_iterator const & it) const noexcept + { + return !(*this < it); + } + bool operator<=(int_vector_iterator const & it) const noexcept + { + return !(*this > it); + } + inline difference_type operator-(int_vector_iterator const & it) const noexcept + { + return (((m_word - it.m_word) << 6) + m_offset - it.m_offset) / m_len; + } +}; +template +inline int_vector_iterator operator+(typename int_vector_iterator::difference_type n, + int_vector_iterator const & it) +{ + return it + n; +} +template +class int_vector_const_iterator : public int_vector_iterator_base +{ +public: + typedef typename t_int_vector::value_type const_reference; + typedef const typename t_int_vector::value_type * pointer; + typedef int_vector_const_iterator const_iterator; + typedef typename t_int_vector::size_type size_type; + typedef typename t_int_vector::difference_type difference_type; + template + friend typename int_vector_const_iterator::difference_type operator-(int_vector_const_iterator const & x, + int_vector_const_iterator const & y); + friend class int_vector_iterator; + friend class int_vector_iterator_base; +private: + using int_vector_iterator_base::m_offset; + using int_vector_iterator_base::m_len; + const typename t_int_vector::value_type * m_word; +public: + int_vector_const_iterator(t_int_vector const * v = nullptr, size_type idx = 0) : + int_vector_iterator_base(v, idx), + m_word((v != nullptr) ? v->m_data + (idx >> 6) : nullptr) + {} + int_vector_const_iterator(int_vector_iterator const & it) : m_word(it.m_word) + { + m_offset = it.m_offset; + m_len = it.m_len; + } + int_vector_const_iterator(int_vector_const_iterator const &) = default; + int_vector_const_iterator & operator=(int_vector_const_iterator const &) = default; + const_reference operator*() const + { + if (m_offset + m_len <= 64) + { + return ((*m_word) >> m_offset) & bits::lo_set[m_len]; + } + return ((*m_word) >> m_offset) | ((*(m_word + 1) & bits::lo_set[(m_offset + m_len) & 0x3F]) << (64 - m_offset)); + } + const_iterator & operator++() + { + m_offset += m_len; + if (m_offset >= 64) + { + m_offset &= 0x3F; + ++m_word; + } + return *this; + } + const_iterator operator++(int) + { + int_vector_const_iterator it = *this; + ++(*this); + return it; + } + const_iterator & operator--() + { + m_offset -= m_len; + if (m_offset >= 64) + { + m_offset &= 0x3F; + --m_word; + } + return *this; + } + const_iterator operator--(int) + { + int_vector_const_iterator it = *this; + --(*this); + return it; + } + const_iterator & operator+=(difference_type i) + { + if (i < 0) + return *this -= (-i); + difference_type t = i * m_len; + m_word += (t >> 6); + if ((m_offset += (t & 0x3F)) & ~0x3F) + { + ++m_word; + m_offset &= 0x3F; + } + return *this; + } + const_iterator & operator-=(difference_type i) + { + if (i < 0) + return *this += (-i); + difference_type t = i * m_len; + m_word -= (t >> 6); + if ((m_offset -= (t & 0x3F)) & ~0x3F) + { + --m_word; + m_offset &= 0x3F; + } + return *this; + } + const_iterator operator+(difference_type i) const + { + const_iterator it = *this; + return it += i; + } + const_iterator operator-(difference_type i) const + { + const_iterator it = *this; + return it -= i; + } + const_reference operator[](difference_type i) const + { + return *(*this + i); + } + bool operator==(int_vector_const_iterator const & it) const noexcept + { + return it.m_word == m_word && it.m_offset == m_offset; + } + bool operator!=(int_vector_const_iterator const & it) const noexcept + { + return !(*this == it); + } + bool operator<(int_vector_const_iterator const & it) const noexcept + { + if (m_word == it.m_word) + return m_offset < it.m_offset; + return m_word < it.m_word; + } + bool operator>(int_vector_const_iterator const & it) const noexcept + { + if (m_word == it.m_word) + return m_offset > it.m_offset; + return m_word > it.m_word; + } + bool operator>=(int_vector_const_iterator const & it) const noexcept + { + return !(*this < it); + } + bool operator<=(int_vector_const_iterator const & it) const noexcept + { + return !(*this > it); + } +}; +template +inline typename int_vector_const_iterator::difference_type +operator-(int_vector_const_iterator const & x, int_vector_const_iterator const & y) +{ + return (((x.m_word - y.m_word) << 6) + x.m_offset - y.m_offset) / x.m_len; +} +template +inline int_vector_const_iterator +operator+(typename int_vector_const_iterator::difference_type n, + int_vector_const_iterator const & it) +{ + return it + n; +} +template +inline typename std::enable_if::value, std::ostream &>::type +operator<<(std::ostream & os, t_bv const & bv) +{ + for (auto b : bv) + { + os << b; + } + return os; +} +template +inline int_vector::int_vector(size_type size, value_type default_value, uint8_t int_width) : + m_size(0), + m_capacity(0), + m_data(nullptr), + m_width(t_width) +{ + width(int_width); + assign(size, default_value); +} +template +inline int_vector::int_vector(int_vector && v) : + m_size(v.m_size), + m_capacity(v.m_capacity), + m_data(v.m_data), + m_width(v.m_width) +{ + v.m_data = nullptr; + v.m_size = 0; + v.m_capacity = 0; +} +template +inline int_vector::int_vector(int_vector const & v) : + m_size(0), + m_capacity(0), + m_data(nullptr), + m_width(v.m_width) +{ + width(v.m_width); + resize(v.size()); + if (v.m_size > 0) + { + if (memcpy(m_data, v.data(), bit_data_size() << 3) == nullptr) + { + throw std::bad_alloc(); + } + } +} +template +int_vector & int_vector::operator=(int_vector const & v) +{ + if (this != &v) + { + int_vector tmp(v); + *this = std::move(tmp); + } + return *this; +} +template +int_vector & int_vector::operator=(int_vector && v) +{ + if (this != &v) + { + memory_manager::clear(*this); + m_size = v.m_size; + m_data = v.m_data; + m_width = v.m_width; + m_capacity = v.m_capacity; + v.m_data = nullptr; + v.m_size = 0; + v.m_capacity = 0; + } + return *this; +} +template +int_vector::~int_vector() +{ + memory_manager::clear(*this); +} +template +void swap(int_vector & v1, int_vector & v2) noexcept +{ + std::swap(v1, v2); +} +template +void int_vector::bit_resize(const size_type size) +{ + if (size > m_capacity || m_data == nullptr) + { + memory_manager::resize(*this, size); + } + m_size = size; +} +template +void int_vector::bit_resize(const size_type size, const value_type value) +{ + size_type old_size = m_size; + bit_resize(size); + auto it = begin() + old_size / m_width; + util::set_to_value(*this, value, it); +} +template +auto int_vector::get_int(size_type idx, const uint8_t len) const -> value_type +{ +#ifdef SDSL_DEBUG + if (idx + len > m_size) + { + throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::get_int(size_type, uint8_t); idx+len > size()!"); + } + if (len > 64) + { + throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::get_int(size_type, uint8_t); len>64!"); + } +#endif + return bits::read_int(m_data + (idx >> 6), idx & 0x3F, len); +} +template +inline void int_vector::set_int(size_type idx, value_type x, const uint8_t len) +{ +#ifdef SDSL_DEBUG + if (idx + len > m_size) + { + throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::set_int(size_type, uint8_t); idx+len > size()!"); + } + if (len > 64) + { + throw std::out_of_range("OUT_OF_RANGE_ERROR: int_vector::set_int(size_type, uint8_t); len>64!"); + } +#endif + bits::write_int(m_data + (idx >> 6), x, idx & 0x3F, len); +} +template +inline typename int_vector::size_type int_vector::size() const noexcept +{ + return m_size / t_width; +} +template <> +inline typename int_vector<64>::size_type int_vector<64>::size() const noexcept +{ + return m_size >> 6; +} +template <> +inline typename int_vector<32>::size_type int_vector<32>::size() const noexcept +{ + return m_size >> 5; +} +template <> +inline typename int_vector<16>::size_type int_vector<16>::size() const noexcept +{ + return m_size >> 4; +} +template <> +inline typename int_vector<8>::size_type int_vector<8>::size() const noexcept +{ + return m_size >> 3; +} +template <> +inline typename int_vector<1>::size_type int_vector<1>::size() const noexcept +{ + return m_size; +} +template <> +inline typename int_vector<0>::size_type int_vector<0>::size() const noexcept +{ + return m_size / m_width; +} +template +inline typename int_vector::size_type int_vector::capacity() const noexcept +{ + return m_capacity / t_width; +} +template <> +inline typename int_vector<64>::size_type int_vector<64>::capacity() const noexcept +{ + return m_capacity >> 6; +} +template <> +inline typename int_vector<32>::size_type int_vector<32>::capacity() const noexcept +{ + return m_capacity >> 5; +} +template <> +inline typename int_vector<16>::size_type int_vector<16>::capacity() const noexcept +{ + return m_capacity >> 4; +} +template <> +inline typename int_vector<8>::size_type int_vector<8>::capacity() const noexcept +{ + return m_capacity >> 3; +} +template <> +inline typename int_vector<1>::size_type int_vector<1>::capacity() const noexcept +{ + return m_capacity; +} +template <> +inline typename int_vector<0>::size_type int_vector<0>::capacity() const noexcept +{ + return m_capacity / m_width; +} +template +inline auto int_vector::operator[](size_type const & idx) noexcept -> reference +{ + assert(idx < this->size()); + size_type i = idx * m_width; + return reference(this->m_data + (i >> 6), i & 0x3F, m_width); +} +template <> +inline auto int_vector<64>::operator[](size_type const & idx) noexcept -> reference +{ + assert(idx < this->size()); + return *(this->m_data + idx); +} +template <> +inline auto int_vector<32>::operator[](size_type const & idx) noexcept -> reference +{ + assert(idx < this->size()); + return *(((uint32_t *)(this->m_data)) + idx); +} +template <> +inline auto int_vector<16>::operator[](size_type const & idx) noexcept -> reference +{ + assert(idx < this->size()); + return *(((uint16_t *)(this->m_data)) + idx); +} +template <> +inline auto int_vector<8>::operator[](size_type const & idx) noexcept -> reference +{ + assert(idx < this->size()); + return *(((uint8_t *)(this->m_data)) + idx); +} +template +inline auto int_vector::operator[](size_type const & idx) const noexcept -> const_reference +{ + assert(idx < this->size()); + return get_int(idx * t_width, t_width); +} +template <> +inline auto int_vector<0>::operator[](size_type const & idx) const noexcept -> const_reference +{ + assert(idx < this->size()); + return get_int(idx * m_width, m_width); +} +template <> +inline auto int_vector<64>::operator[](size_type const & idx) const noexcept -> const_reference +{ + assert(idx < this->size()); + return *(this->m_data + idx); +} +template <> +inline auto int_vector<32>::operator[](size_type const & idx) const noexcept -> const_reference +{ + assert(idx < this->size()); + return *(((uint32_t *)this->m_data) + idx); +} +template <> +inline auto int_vector<16>::operator[](size_type const & idx) const noexcept -> const_reference +{ + assert(idx < this->size()); + return *(((uint16_t *)this->m_data) + idx); +} +template <> +inline auto int_vector<8>::operator[](size_type const & idx) const noexcept -> const_reference +{ + assert(idx < this->size()); + return *(((uint8_t *)this->m_data) + idx); +} +template <> +inline auto int_vector<1>::operator[](size_type const & idx) const noexcept -> const_reference +{ + assert(idx < this->size()); + return ((*(m_data + (idx >> 6))) >> (idx & 0x3F)) & 1; +} +template +bool int_vector::operator<(int_vector const & v) const noexcept +{ + size_type min_size = size(); + if (min_size > v.size()) + min_size = v.size(); + for (auto it = begin(), end = begin() + min_size, it_v = v.begin(); it != end; ++it, ++it_v) + { + if (*it == *it_v) + continue; + else + return *it < *it_v; + } + return size() < v.size(); +} +template +bool int_vector::operator>(int_vector const & v) const noexcept +{ + size_type min_size = size(); + if (min_size > v.size()) + min_size = v.size(); + for (auto it = begin(), end = begin() + min_size, it_v = v.begin(); it != end; ++it, ++it_v) + { + if (*it == *it_v) + continue; + else + return *it > *it_v; + } + return size() > v.size(); +} +template +bool int_vector::operator<=(int_vector const & v) const noexcept +{ + return *this == v or *this < v; +} +template +bool int_vector::operator>=(int_vector const & v) const noexcept +{ + return *this == v or *this > v; +} +template +int_vector & int_vector::operator&=(int_vector const & v) +{ + assert(v.bit_size() == bit_size()); + for (uint64_t i = 0; i < bit_data_size(); ++i) + m_data[i] &= v.m_data[i]; + return *this; +} +template +int_vector & int_vector::operator|=(int_vector const & v) +{ + assert(bit_size() == v.bit_size()); + for (uint64_t i = 0; i < bit_data_size(); ++i) + m_data[i] |= v.m_data[i]; + return *this; +} +template +int_vector & int_vector::operator^=(int_vector const & v) +{ + assert(bit_size() == v.bit_size()); + for (uint64_t i = 0; i < bit_data_size(); ++i) + m_data[i] ^= v.m_data[i]; + return *this; +} +template +typename int_vector::size_type int_vector::write_data(std::ostream & out) const +{ + size_type written_bytes = 0; + uint64_t * p = m_data; + size_type idx = 0; + while (idx + conf::SDSL_BLOCK_SIZE < bit_data_size()) + { + out.write((char *)p, conf::SDSL_BLOCK_SIZE * sizeof(uint64_t)); + written_bytes += conf::SDSL_BLOCK_SIZE * sizeof(uint64_t); + p += conf::SDSL_BLOCK_SIZE; + idx += conf::SDSL_BLOCK_SIZE; + } + out.write((char *)p, (bit_data_size() - idx) * sizeof(uint64_t)); + written_bytes += (bit_data_size() - idx) * sizeof(uint64_t); + return written_bytes; +} +template +typename int_vector::size_type +int_vector::serialize(std::ostream & out, structure_tree_node * v, std::string name) const +{ + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = int_vector::write_header(m_size, m_width, out); + written_bytes += write_data(out); + structure_tree::add_size(child, written_bytes); + return written_bytes; +} +template +void int_vector::load(std::istream & in) +{ + size_type size; + int_vector::read_header(size, m_width, in); + bit_resize(size); + uint64_t * p = m_data; + size_type idx = 0; + while (idx + conf::SDSL_BLOCK_SIZE < bit_data_size()) + { + in.read((char *)p, conf::SDSL_BLOCK_SIZE * sizeof(uint64_t)); + p += conf::SDSL_BLOCK_SIZE; + idx += conf::SDSL_BLOCK_SIZE; + } + in.read((char *)p, (bit_data_size() - idx) * sizeof(uint64_t)); +} +template +template +inline typename std::enable_if< + cereal::traits::is_output_serializable>, archive_t>::value, + void>::type +int_vector::CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const +{ + ar(CEREAL_NVP(cereal::make_size_tag(static_cast(m_width)))); + ar(CEREAL_NVP(growth_factor)); + ar(CEREAL_NVP(cereal::make_size_tag(static_cast(m_size)))); + ar(cereal::make_nvp("data", cereal::binary_data(m_data, bit_data_size() * sizeof(uint64_t)))); +} +template +template +inline typename std::enable_if< + !cereal::traits::is_output_serializable>, archive_t>::value, + void>::type +int_vector::CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const +{ + ar(CEREAL_NVP(m_width)); + ar(CEREAL_NVP(growth_factor)); + ar(CEREAL_NVP(m_size)); + for (value_type const & v : *this) + ar(v); +} +template +template +inline typename std::enable_if< + cereal::traits::is_input_serializable>, archive_t>::value, + void>::type +int_vector::CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) +{ + ar(CEREAL_NVP(cereal::make_size_tag(m_width))); + ar(CEREAL_NVP(growth_factor)); + ar(CEREAL_NVP(cereal::make_size_tag(m_size))); + resize(size()); + ar(cereal::make_nvp("data", cereal::binary_data(m_data, bit_data_size() * sizeof(uint64_t)))); +} +template +template +inline typename std::enable_if< + !cereal::traits::is_input_serializable>, archive_t>::value, + void>::type +int_vector::CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) +{ + ar(CEREAL_NVP(m_width)); + width(width()); + ar(CEREAL_NVP(growth_factor)); + ar(CEREAL_NVP(m_size)); + resize(size()); + for (size_t i = 0; i < size(); ++i) + { + value_type tmp; + ar(tmp); + operator[](i) = tmp; + } +} +inline typename int_vector_trait<64>::iterator +int_vector_trait<64>::begin(typename int_vector_trait<64>::int_vector_type * v) noexcept +{ + return v->data(); +} +inline typename int_vector_trait<64>::iterator +int_vector_trait<64>::end(typename int_vector_trait<64>::int_vector_type * v) noexcept +{ + return v->data() + v->size(); +} +inline typename int_vector_trait<64>::const_iterator +int_vector_trait<64>::begin(const typename int_vector_trait<64>::int_vector_type * v) noexcept +{ + return v->data(); +} +inline typename int_vector_trait<64>::const_iterator +int_vector_trait<64>::end(const typename int_vector_trait<64>::int_vector_type * v) noexcept +{ + return v->data() + v->size(); +} +inline typename int_vector_trait<32>::iterator +int_vector_trait<32>::begin(typename int_vector_trait<32>::int_vector_type * v) noexcept +{ + return (uint32_t *)v->data(); +} +inline typename int_vector_trait<32>::iterator +int_vector_trait<32>::end(typename int_vector_trait<32>::int_vector_type * v) noexcept +{ + return ((uint32_t *)v->data()) + v->size(); +} +inline typename int_vector_trait<32>::const_iterator +int_vector_trait<32>::begin(const typename int_vector_trait<32>::int_vector_type * v) noexcept +{ + return (uint32_t *)v->data(); +} +inline typename int_vector_trait<32>::const_iterator +int_vector_trait<32>::end(const typename int_vector_trait<32>::int_vector_type * v) noexcept +{ + return ((uint32_t *)v->data()) + v->size(); +} +inline typename int_vector_trait<16>::iterator +int_vector_trait<16>::begin(typename int_vector_trait<16>::int_vector_type * v) noexcept +{ + return (uint16_t *)v->data(); +} +inline typename int_vector_trait<16>::iterator +int_vector_trait<16>::end(typename int_vector_trait<16>::int_vector_type * v) noexcept +{ + return ((uint16_t *)v->data()) + v->size(); +} +inline typename int_vector_trait<16>::const_iterator +int_vector_trait<16>::begin(const typename int_vector_trait<16>::int_vector_type * v) noexcept +{ + return (uint16_t *)v->data(); +} +inline typename int_vector_trait<16>::const_iterator +int_vector_trait<16>::end(const typename int_vector_trait<16>::int_vector_type * v) noexcept +{ + return ((uint16_t *)v->data()) + v->size(); +} +inline typename int_vector_trait<8>::iterator +int_vector_trait<8>::begin(typename int_vector_trait<8>::int_vector_type * v) noexcept +{ + return (uint8_t *)v->data(); +} +inline typename int_vector_trait<8>::iterator +int_vector_trait<8>::end(typename int_vector_trait<8>::int_vector_type * v) noexcept +{ + return ((uint8_t *)v->data()) + v->size(); +} +inline typename int_vector_trait<8>::const_iterator +int_vector_trait<8>::begin(const typename int_vector_trait<8>::int_vector_type * v) noexcept +{ + return (uint8_t *)v->data(); +} +inline typename int_vector_trait<8>::const_iterator +int_vector_trait<8>::end(const typename int_vector_trait<8>::int_vector_type * v) noexcept +{ + return ((uint8_t *)v->data()) + v->size(); +} +} +#ifndef INCLUDED_INT_VECTOR_BUFFER +#define INCLUDED_INT_VECTOR_BUFFER +#include +#include +#include +#include +#include +namespace sdsl +{ +template +class int_vector_buffer +{ +public: + class iterator; + typedef typename int_vector::difference_type difference_type; + typedef typename int_vector::value_type value_type; + typedef typename int_vector::size_type size_type; +private: + static_assert(t_width <= 64, "int_vector_buffer: width must be at most 64 bits."); + sdsl::isfstream m_ifile; + sdsl::osfstream m_ofile; + std::string m_filename; + int_vector m_buffer; + bool m_need_to_write = false; + uint64_t m_offset = 0; + uint64_t m_buffersize = 8; + uint64_t m_size = 0; + uint64_t m_begin = 0; + void read_block(const uint64_t idx) + { + m_begin = (idx / m_buffersize) * m_buffersize; + if (m_begin >= m_size) + { + util::set_to_value(m_buffer, 0); + } + else + { + m_ifile.seekg(m_offset + (m_begin * width()) / 8); + assert(m_ifile.good()); + m_ifile.read((char *)m_buffer.data(), (m_buffersize * width()) / 8); + if ((uint64_t)m_ifile.gcount() < (m_buffersize * width()) / 8) + { + m_ifile.clear(); + } + assert(m_ifile.good()); + for (uint64_t i = m_size - m_begin; i < m_buffersize; ++i) + { + m_buffer[i] = 0; + } + } + } + void write_block() + { + if (m_need_to_write) + { + m_ofile.seekp(m_offset + (m_begin * width()) / 8); + assert(m_ofile.good()); + if (m_begin + m_buffersize >= m_size) + { + uint64_t wb = ((m_size - m_begin) * width() + 7) / 8; + m_ofile.write((char *)m_buffer.data(), wb); + } + else + { + m_ofile.write((char *)m_buffer.data(), (m_buffersize * width()) / 8); + } + m_ofile.flush(); + assert(m_ofile.good()); + m_need_to_write = false; + } + } + uint64_t read(const uint64_t idx) + { + assert(is_open()); + assert(idx < m_size); + if (idx < m_begin or m_begin + m_buffersize <= idx) + { + write_block(); + read_block(idx); + } + return m_buffer[idx - m_begin]; + } + void write(const uint64_t idx, const uint64_t value) + { + assert(is_open()); + if (idx < m_begin or m_begin + m_buffersize <= idx) + { + write_block(); + read_block(idx); + } + if (m_size <= idx) + { + m_size = idx + 1; + } + m_need_to_write = true; + m_buffer[idx - m_begin] = value; + } +public: + int_vector_buffer() + { + m_buffer = int_vector(); + } + int_vector_buffer(const std::string filename, + std::ios::openmode mode = std::ios::in, + const uint64_t buffer_size = 1024 * 1024, + const uint8_t int_width = t_width, + bool const is_plain = false) + { + m_filename = filename; + assert(!(mode & std::ios::app)); + mode &= ~std::ios::app; + m_buffer.width(int_width); + if (is_plain) + { + m_offset = 0; + assert(8 == width() or 16 == width() or 32 == width() or 64 == width()); + } + else + { + m_offset = 8; + } + m_ofile.open(m_filename, mode | std::ios::out | std::ios::binary); + assert(m_ofile.good()); + m_ifile.open(m_filename, std::ios::in | std::ios::binary); + assert(m_ifile.good()); + if (mode & std::ios::in) + { + uint64_t size = 0; + if (is_plain) + { + m_ifile.seekg(0, std::ios_base::end); + size = m_ifile.tellg() * 8; + } + else + { + uint8_t width = 0; + int_vector<0>::read_header(size, width, m_ifile); + m_buffer.width(width); + } + assert(m_ifile.good()); + m_size = size / width(); + } + buffersize(buffer_size); + } + int_vector_buffer(int_vector_buffer && ivb) : + m_filename(std::move(ivb.m_filename)), + m_buffer(std::move(ivb.m_buffer)), + m_need_to_write(ivb.m_need_to_write), + m_offset(ivb.m_offset), + m_buffersize(ivb.m_buffersize), + m_size(ivb.m_size), + m_begin(ivb.m_begin) + { + ivb.m_ifile.close(); + ivb.m_ofile.close(); + m_ifile.open(m_filename, std::ios::in | std::ios::binary); + m_ofile.open(m_filename, std::ios::in | std::ios::out | std::ios::binary); + assert(m_ifile.good()); + assert(m_ofile.good()); + ivb.m_filename = ""; + ivb.m_buffer = int_vector(); + ivb.m_need_to_write = false; + ivb.m_offset = 0; + ivb.m_buffersize = 8; + ivb.m_size = 0; + ivb.m_begin = 0; + } + ~int_vector_buffer() + { + close(); + } + int_vector_buffer & operator=(int_vector_buffer && ivb) + { + close(); + ivb.m_ifile.close(); + ivb.m_ofile.close(); + m_filename = ivb.m_filename; + m_ifile.open(m_filename, std::ios::in | std::ios::binary); + m_ofile.open(m_filename, std::ios::in | std::ios::out | std::ios::binary); + assert(m_ifile.good()); + assert(m_ofile.good()); + m_buffer = (int_vector &&) ivb.m_buffer; + m_need_to_write = ivb.m_need_to_write; + m_offset = ivb.m_offset; + m_buffersize = ivb.m_buffersize; + m_size = ivb.m_size; + m_begin = ivb.m_begin; + ivb.m_filename = ""; + ivb.m_buffer = int_vector(); + ivb.m_need_to_write = false; + ivb.m_offset = 0; + ivb.m_buffersize = 8; + ivb.m_size = 0; + ivb.m_begin = 0; + return *this; + } + uint8_t width() const + { + return m_buffer.width(); + } + uint64_t size() const + { + return m_size; + } + std::string filename() const + { + return m_filename; + } + uint64_t buffersize() const + { + assert(m_buffersize * width() % 8 == 0); + return (m_buffersize * width()) / 8; + } + void buffersize(uint64_t buffersize) + { + if (0ULL == buffersize) + buffersize = 8; + write_block(); + if (0 == (buffersize * 8) % width()) + { + m_buffersize = + buffersize * 8 / width(); + } + else + { + uint64_t element_buffersize = + (buffersize * 8) / width() + 1; + m_buffersize = element_buffersize + 7 - (element_buffersize + 7) % 8; + } + m_buffer = int_vector(m_buffersize, 0, width()); + if (0 != m_buffersize) + read_block(0); + } + bool good() + { + return m_ifile.good() and m_ofile.good(); + } + bool is_open() + { + return m_ifile.is_open() and m_ofile.is_open(); + ; + } + void reset() + { + assert(m_ifile.good()); + assert(m_ofile.good()); + m_ifile.close(); + m_ofile.close(); + m_ofile.open(m_filename, std::ios::out | std::ios::binary); + assert(m_ofile.good()); + m_ifile.open(m_filename, std::ios::in | std::ios::binary); + assert(m_ifile.good()); + assert(m_ofile.good()); + m_need_to_write = false; + m_size = 0; + read_block(0); + } + class reference; + reference operator[](uint64_t idx) + { + return reference(this, idx); + } + void push_back(const uint64_t value) + { + write(m_size, value); + } + void close(bool remove_file = false) + { + if (is_open()) + { + if (!remove_file) + { + write_block(); + if (0 < m_offset) + { + uint64_t size = m_size * width(); + m_ofile.seekp(0, std::ios::beg); + int_vector::write_header(size, width(), m_ofile); + assert(m_ofile.good()); + uint64_t wb = (size + 7) / 8; + if (wb % 8) + { + m_ofile.seekp(m_offset + wb); + assert(m_ofile.good()); + m_ofile.write("\0\0\0\0\0\0\0\0", 8 - wb % 8); + assert(m_ofile.good()); + } + } + } + m_ifile.close(); + assert(m_ifile.good()); + m_ofile.close(); + assert(m_ofile.good()); + if (remove_file) + { + sdsl::remove(m_filename); + } + } + } + iterator begin() + { + return iterator(*this, 0); + } + iterator end() + { + return iterator(*this, size()); + } + class reference + { + friend class int_vector_buffer; + private: + int_vector_buffer * const m_int_vector_buffer = nullptr; + uint64_t m_idx = 0; + reference() + {} + reference(int_vector_buffer * _int_vector_buffer, uint64_t _idx) : + m_int_vector_buffer(_int_vector_buffer), + m_idx(_idx) + {} + public: + operator uint64_t() const + { + return m_int_vector_buffer->read(m_idx); + } + reference & operator=(uint64_t const & val) + { + m_int_vector_buffer->write(m_idx, val); + return *this; + } + reference & operator=(reference const & x) + { + return *this = (uint64_t)(x); + }; + reference(reference const &) = default; + reference & operator++() + { + uint64_t x = m_int_vector_buffer->read(m_idx); + m_int_vector_buffer->write(m_idx, x + 1); + return *this; + } + uint64_t operator++(int) + { + uint64_t val = (uint64_t) * this; + ++(*this); + return val; + } + reference & operator--() + { + uint64_t x = m_int_vector_buffer->read(m_idx); + m_int_vector_buffer->write(m_idx, x - 1); + return *this; + } + uint64_t operator--(int) + { + uint64_t val = (uint64_t) * this; + --(*this); + return val; + } + reference & operator+=(const uint64_t x) + { + uint64_t w = m_int_vector_buffer->read(m_idx); + m_int_vector_buffer->write(m_idx, w + x); + return *this; + } + reference & operator-=(const uint64_t x) + { + uint64_t w = m_int_vector_buffer->read(m_idx); + m_int_vector_buffer->write(m_idx, w - x); + return *this; + } + bool operator==(reference const & x) const + { + return (uint64_t) * this == (uint64_t)x; + } + bool operator<(reference const & x) const + { + return (uint64_t) * this < (uint64_t)x; + } + }; + class iterator + { + private: + int_vector_buffer * m_ivb{nullptr}; + uint64_t m_idx = 0; + public: + using iterator_category = std::random_access_iterator_tag; + using value_type = sdsl::int_vector_buffer::value_type; + using difference_type = sdsl::int_vector_buffer::difference_type; + using pointer = value_type *; + using reference = sdsl::int_vector_buffer::reference; + iterator() = delete; + iterator(int_vector_buffer & ivb, uint64_t idx = 0) : m_ivb(&ivb), m_idx(idx) + {} + iterator & operator++() + { + ++m_idx; + return *this; + } + iterator operator++(int) + { + iterator it = *this; + ++(*this); + return it; + } + iterator & operator--() + { + --m_idx; + return *this; + } + iterator operator--(int) + { + iterator it = *this; + --(*this); + return it; + } + reference operator*() const + { + assert(m_ivb != nullptr); + return (*m_ivb)[m_idx]; + } + iterator & operator+=(difference_type i) + { + if (i < 0) + return *this -= (-i); + m_idx += i; + return *this; + } + iterator & operator-=(difference_type i) + { + if (i < 0) + return *this += (-i); + m_idx -= i; + return *this; + } + iterator operator+(difference_type i) const + { + iterator it = *this; + return it += i; + } + iterator operator-(difference_type i) const + { + iterator it = *this; + return it -= i; + } + bool operator==(iterator const & it) const + { + return m_ivb == it.m_ivb and m_idx == it.m_idx; + } + bool operator!=(iterator const & it) const + { + return !(*this == it); + } + inline difference_type operator-(iterator const & it) + { + return (m_idx - it.m_idx); + } + }; +}; +} +#endif +#ifndef SDSL_INT_VECTOR_MAPPER +#define SDSL_INT_VECTOR_MAPPER +#include +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +template +class int_vector_mapper +{ + static_assert(t_width <= 64, "int_vector_mapper: width must be at most 64 bits."); +public: + typedef typename int_vector::difference_type difference_type; + typedef typename int_vector::value_type value_type; + typedef typename int_vector::size_type size_type; + typedef typename int_vector::int_width_type width_type; + static constexpr uint8_t fixed_int_width = t_width; +public: + const size_type append_block_size = 1000000; +private: + uint8_t * m_mapped_data = nullptr; + uint64_t m_file_size_bytes = 0; + off_t m_data_offset = 0; + int m_fd = -1; + int_vector m_wrapper; + std::string m_file_name; + bool m_delete_on_close; +public: + int_vector_mapper() = delete; + int_vector_mapper(int_vector_mapper const &) = delete; + int_vector_mapper & operator=(int_vector_mapper const &) = delete; +public: + ~int_vector_mapper() + { + if (m_mapped_data) + { + auto ret = memory_manager::mem_unmap(m_fd, m_mapped_data, m_file_size_bytes); + if (ret != 0) + { + std::cerr << "int_vector_mapper: error unmapping file mapping'" << m_file_name << "': " << ret + << std::endl; + } + if (t_mode & std::ios_base::out) + { + if (m_data_offset) + { + osfstream out(m_file_name, std::ios::in); + if (out) + { + out.seekp(0, std::ios::beg); + int_vector::write_header(m_wrapper.m_size, m_wrapper.m_width, out); + } + else + { + std::cerr << "int_vector_mapper: could not open file for header update" << std::endl; + } + } + } + if (t_mode & std::ios_base::out) + { + size_type current_bit_size = m_wrapper.m_size; + size_type data_size_in_bytes = ((current_bit_size + 63) >> 6) << 3; + if (m_file_size_bytes != data_size_in_bytes + m_data_offset) + { + int tret = memory_manager::truncate_file_mmap(m_fd, data_size_in_bytes + m_data_offset); + if (tret == -1) + { + std::string truncate_error = + std::string("int_vector_mapper: truncate error. ") + std::string(util::str_from_errno()); + std::cerr << truncate_error; + } + } + } + } + if (m_fd != -1) + { + auto ret = memory_manager::close_file_for_mmap(m_fd); + if (ret != 0) + { + std::cerr << "int_vector_mapper: error closing file mapping'" << m_file_name << "': " << ret + << std::endl; + } + if (m_delete_on_close) + { + int ret_code = sdsl::remove(m_file_name); + if (ret_code != 0) + { + std::cerr << "int_vector_mapper: error deleting file '" << m_file_name << "': " << ret_code + << std::endl; + } + } + } + m_wrapper.m_data = nullptr; + m_wrapper.m_size = 0; + } + int_vector_mapper(int_vector_mapper && ivm) + { + m_wrapper.m_data = ivm.m_wrapper.m_data; + m_wrapper.m_size = ivm.m_wrapper.m_size; + m_wrapper.width(ivm.m_wrapper.width()); + m_file_name = ivm.m_file_name; + m_delete_on_close = ivm.m_delete_on_close; + ivm.m_wrapper.m_data = nullptr; + ivm.m_wrapper.m_size = 0; + ivm.m_mapped_data = nullptr; + ivm.m_fd = -1; + } + int_vector_mapper & operator=(int_vector_mapper && ivm) + { + m_wrapper.m_data = ivm.m_wrapper.m_data; + m_wrapper.m_size = ivm.m_wrapper.m_size; + m_wrapper.width(ivm.m_wrapper.width()); + m_file_name = ivm.m_file_name; + m_delete_on_close = ivm.m_delete_on_close; + ivm.m_wrapper.m_data = nullptr; + ivm.m_wrapper.m_size = 0; + ivm.m_mapped_data = nullptr; + ivm.m_fd = -1; + return (*this); + } + int_vector_mapper(std::string const & key, cache_config const & config) : + int_vector_mapper(cache_file_name(key, config)) + {} + int_vector_mapper(const std::string filename, bool is_plain = false, bool delete_on_close = false) : + m_data_offset(0), + m_file_name(filename), + m_delete_on_close(delete_on_close) + { + size_type size_in_bits = 0; + uint8_t int_width = t_width; + { + isfstream f(filename, std::ifstream::binary); + if (!f.is_open()) + { + throw std::runtime_error("int_vector_mapper: file " + m_file_name + " does not exist."); + } + if (!is_plain) + { + m_data_offset = int_vector::read_header(size_in_bits, int_width, f); + } + } + m_file_size_bytes = util::file_size(m_file_name); + if (is_plain) + { + if (8 != t_width and 16 != t_width and 32 != t_width and 64 != t_width) + { + throw std::runtime_error("int_vector_mapper: plain vector can " + "only be of width 8, 16, 32, 64."); + } + else + { + uint8_t byte_width = t_width / 8; + if ((m_file_size_bytes & bits::lo_set[bits::cnt(byte_width - 1)]) != 0) + { + throw std::runtime_error("int_vector_mapper: plain vector not a multiple of byte: " + + std::to_string(m_file_size_bytes) + " mod " + std::to_string(byte_width) + + " != 0"); + } + } + size_in_bits = m_file_size_bytes * 8; + } + m_fd = memory_manager::open_file_for_mmap(m_file_name, t_mode); + if (m_fd == -1) + { + std::string open_error = + std::string("int_vector_mapper: open file error.") + std::string(util::str_from_errno()); + throw std::runtime_error(open_error); + } + m_wrapper.width(int_width); + m_mapped_data = (uint8_t *)memory_manager::mmap_file(m_fd, m_file_size_bytes, t_mode); + if (m_mapped_data == nullptr) + { + std::string mmap_error = + std::string("int_vector_mapper: mmap error. ") + std::string(util::str_from_errno()); + throw std::runtime_error(mmap_error); + } + m_wrapper.m_size = size_in_bits; + free(m_wrapper.m_data); + m_wrapper.m_data = (uint64_t *)(m_mapped_data + m_data_offset); + } + std::string file_name() const + { + return m_file_name; + } + width_type width() const + { + return m_wrapper.width(); + } + void width(const uint8_t new_int_width) + { + static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'width'"); + m_wrapper.width(new_int_width); + } + size_type size() const + { + return m_wrapper.size(); + } + void bit_resize(const size_type bit_size) + { + static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'bit_resize'"); + size_type new_size_in_bytes = ((bit_size + 63) >> 6) << 3; + if (m_file_size_bytes != new_size_in_bytes + m_data_offset) + { + if (m_mapped_data) + { + auto ret = memory_manager::mem_unmap(m_fd, m_mapped_data, m_file_size_bytes); + if (ret != 0) + { + std::cerr << "int_vector_mapper: error unmapping file mapping'" << m_file_name << "': " << ret + << std::endl; + } + } + int tret = memory_manager::truncate_file_mmap(m_fd, new_size_in_bytes + m_data_offset); + if (tret == -1) + { + std::string truncate_error = + std::string("int_vector_mapper: truncate error. ") + std::string(util::str_from_errno()); + throw std::runtime_error(truncate_error); + } + m_file_size_bytes = new_size_in_bytes + m_data_offset; + m_mapped_data = (uint8_t *)memory_manager::mmap_file(m_fd, m_file_size_bytes, t_mode); + if (m_mapped_data == nullptr) + { + std::string mmap_error = + std::string("int_vector_mapper: mmap error. ") + std::string(util::str_from_errno()); + throw std::runtime_error(mmap_error); + } + m_wrapper.m_data = (uint64_t *)(m_mapped_data + m_data_offset); + } + m_wrapper.m_size = bit_size; + } + void resize(const size_type size) + { + static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'resize'"); + size_type size_in_bits = size * width(); + bit_resize(size_in_bits); + } + auto begin() -> typename int_vector::iterator + { + static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'begin'"); + return m_wrapper.begin(); + } + auto end() -> typename int_vector::iterator + { + static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'end'"); + return m_wrapper.end(); + } + auto begin() const -> typename int_vector::const_iterator + { + return m_wrapper.begin(); + } + auto end() const -> typename int_vector::const_iterator + { + return m_wrapper.end(); + } + auto cbegin() const -> typename int_vector::const_iterator + { + return m_wrapper.begin(); + } + auto cend() const -> typename int_vector::const_iterator + { + return m_wrapper.end(); + } + auto operator[](size_type const & idx) const -> typename int_vector::const_reference + { + return m_wrapper[idx]; + } + auto operator[](size_type const & idx) -> typename int_vector::reference + { + static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'operator[]'"); + return m_wrapper[idx]; + } + uint64_t const * data() const + { + return m_wrapper.data(); + } + uint64_t * data() + { + static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'data'"); + return m_wrapper.data(); + } + value_type get_int(size_type idx, const uint8_t len = 64) const + { + return m_wrapper.get_int(idx, len); + } + void set_int(size_type idx, value_type x, const uint8_t len = 64) + { + static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'set_int'"); + m_wrapper.set_int(idx, x, len); + } + void push_back(value_type x) + { + static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'push_back'"); + if (capacity() < size() + 1) + { + size_type old_size = m_wrapper.m_size; + size_type size_in_bits = (size() + append_block_size) * width(); + bit_resize(size_in_bits); + m_wrapper.m_size = old_size; + } + m_wrapper.m_size += width(); + m_wrapper[size() - 1] = x; + } + size_type capacity() const + { + size_t data_size_in_bits = 8 * (m_file_size_bytes - m_data_offset); + return data_size_in_bits / width(); + } + size_type bit_size() const + { + return m_wrapper.bit_size(); + } + template + bool operator==(container const & v) const + { + return std::equal(begin(), end(), v.begin()); + } + bool operator==(int_vector const & v) const + { + return m_wrapper == v; + } + bool operator==(int_vector_mapper const & v) const + { + return m_wrapper == v.m_wrapper; + } + template + bool operator!=(container const & v) const + { + return !(*this == v); + } + void flip() + { + static_assert(t_mode & std::ios_base::out, "int_vector_mapper: must be opened in in+out mode for 'flip'"); + m_wrapper.flip(); + } + bool empty() const + { + return m_wrapper.empty(); + } +}; +template +class temp_file_buffer +{ +private: + static std::string tmp_file(std::string const & dir) + { + char tmp_file_name[1024] = {0}; +#ifdef _WIN32 + auto ret = GetTempFileName(dir.c_str(), "tmp_mapper_file_", 0, tmp_file_name); + if (ret == 0) + { + throw std::runtime_error("could not create temporary file."); + } +#else + snprintf(tmp_file_name, + sizeof(tmp_file_name), + "%s/tmp_mapper_file_%" PRIu64 "_XXXXXX.sdsl", + dir.c_str(), + util::pid()); + int fd = mkstemps(tmp_file_name, 5); + if (fd == -1) + { + throw std::runtime_error("could not create temporary file."); + } + close(fd); +#endif + return std::string(tmp_file_name, strlen(tmp_file_name)); + } +public: + static int_vector_mapper create() + { +#ifdef MSVC_COMPILER + char tmp_dir_name[1024] = {0}; + auto tmp_dir = GetTempPath(1024, tmp_dir_name); + auto file_name = tmp_file(tmp_dir_name); +#else + auto file_name = tmp_file("/tmp"); +#endif + return create(file_name); + } + static int_vector_mapper create(cache_config const & config) + { + auto file_name = tmp_file(config.dir); + return create(file_name); + } + static int_vector_mapper create(std::string const & file_name) + { + int_vector tmp_vector; + store_to_file(tmp_vector, file_name); + return int_vector_mapper(file_name, false, true); + } +}; +template +class write_out_mapper +{ +public: + static int_vector_mapper create(std::string const & key, cache_config & config) + { + auto file_name = cache_file_name(key, config); + auto tmp = create(file_name); + register_cache_file(key, config); + return std::move(tmp); + } + static int_vector_mapper create(std::string const & file_name) + { + int_vector tmp_vector; + store_to_file(tmp_vector, file_name); + return int_vector_mapper(file_name, false, false); + } + static int_vector_mapper create(std::string const & file_name, size_t size, uint8_t int_width = t_width) + { + int_vector tmp_vector(0, 0, int_width); + store_to_file(tmp_vector, file_name); + int_vector_mapper mapper(file_name, false, false); + mapper.resize(size); + return mapper; + } +}; +template +using bit_vector_mapper = int_vector_mapper<1, t_mode>; +template +using read_only_mapper = int_vector_mapper const; +} +#endif +#endif +#ifndef INCLUDED_SDSL_ITERATORS +#define INCLUDED_SDSL_ITERATORS +#include +#include +namespace sdsl +{ +template +class random_access_const_iterator +{ +public: + using iterator_category = std::random_access_iterator_tag; + using value_type = typename t_rac::value_type; + using difference_type = typename t_rac::difference_type; + using pointer = value_type *; + using reference = value_type &; + typedef const typename t_rac::value_type const_reference; + typedef typename t_rac::size_type size_type; + typedef random_access_const_iterator iterator; +private: + t_rac const * m_rac; + typename t_rac::size_type m_idx; + template + friend typename random_access_const_iterator::difference_type + operator-(random_access_const_iterator const & x, random_access_const_iterator const & y); +public: + random_access_const_iterator(t_rac const * rac, size_type idx = 0) : m_rac(rac), m_idx(idx) + {} + const_reference operator*() const + { + return (*m_rac)[m_idx]; + } + iterator & operator++() + { + ++m_idx; + return *this; + } + iterator operator++(int) + { + random_access_const_iterator it = *this; + ++(*this); + return it; + } + iterator & operator--() + { + --m_idx; + return *this; + } + iterator operator--(int) + { + random_access_const_iterator it = *this; + --(*this); + return it; + } + iterator & operator+=(difference_type i) + { + if (i < 0) + return *this -= (-i); + m_idx += i; + return *this; + } + iterator & operator-=(difference_type i) + { + if (i < 0) + return *this += (-i); + m_idx -= i; + return *this; + } + iterator operator+(difference_type i) const + { + iterator it = *this; + return it += i; + } + iterator operator-(difference_type i) const + { + iterator it = *this; + return it -= i; + } + const_reference operator[](difference_type i) const + { + return *(*this + i); + } + bool operator==(iterator const & it) const + { + return it.m_rac == m_rac && it.m_idx == m_idx; + } + bool operator!=(iterator const & it) const + { + return !(*this == it); + } + bool operator<(iterator const & it) const + { + return m_idx < it.m_idx; + } + bool operator>(iterator const & it) const + { + return m_idx > it.m_idx; + } + bool operator>=(iterator const & it) const + { + return !(*this < it); + } + bool operator<=(iterator const & it) const + { + return !(*this > it); + } +}; +template +inline typename random_access_const_iterator::difference_type +operator-(random_access_const_iterator const & x, random_access_const_iterator const & y) +{ + return (typename random_access_const_iterator::difference_type)x.m_idx + - (typename random_access_const_iterator::difference_type)y.m_idx; +} +template +inline random_access_const_iterator operator+(typename random_access_const_iterator::difference_type n, + random_access_const_iterator const & it) +{ + return it + n; +} +template +struct random_access_container +{ + typedef int_vector<>::size_type size_type; + typedef int_vector<>::difference_type difference_type; + typedef typename std::invoke_result_t value_type; + typedef random_access_const_iterator iterator_type; + t_F f; + size_type m_size; + random_access_container(){}; + random_access_container(t_F ff, size_type size) : f(ff), m_size(size) + {} + value_type operator[](size_type i) const + { + return f(i); + } + size_type size() const + { + return m_size; + } + iterator_type begin() const + { + return iterator_type(this, 0); + } + iterator_type end() const + { + return iterator_type(this, size()); + } +}; +} +#endif +namespace sdsl +{ +template +class rank_support_il; +template +class select_support_il; +template +constexpr bool power_of_two(T x) +{ + return std::is_integral::value and x > 1 and !(x & (x - 1)); +} +template +class bit_vector_il +{ + static_assert(t_bs >= 64, "bit_vector_il: blocksize must be be at least 64 bits."); + static_assert(power_of_two(t_bs), "bit_vector_il: blocksize must be a power of two."); +public: + typedef bit_vector::size_type size_type; + typedef size_type value_type; + typedef bit_vector::difference_type difference_type; + typedef random_access_const_iterator iterator; + typedef iterator const_iterator; + typedef bv_tag index_category; + friend class rank_support_il<1, t_bs>; + friend class rank_support_il<0, t_bs>; + friend class select_support_il<1, t_bs>; + friend class select_support_il<0, t_bs>; + typedef rank_support_il<1, t_bs> rank_1_type; + typedef rank_support_il<0, t_bs> rank_0_type; + typedef select_support_il<1, t_bs> select_1_type; + typedef select_support_il<0, t_bs> select_0_type; +private: + size_type m_size = 0; + size_type m_block_num = 0; + size_type m_superblocks = 0; + size_type m_block_shift = 0; + int_vector<64> m_data; + int_vector<64> m_rank_samples; + void init_rank_samples() + { + uint32_t blockSize_U64 = bits::hi(t_bs >> 6); + size_type idx = 0; + std::queue lbs, rbs; + lbs.push(0); + rbs.push(m_superblocks); + while (!lbs.empty()) + { + size_type lb = lbs.front(); + lbs.pop(); + size_type rb = rbs.front(); + rbs.pop(); + if ( idx < m_rank_samples.size()) + { + size_type mid = lb + (rb - lb) / 2; + size_type pos = (mid << blockSize_U64) + mid; + m_rank_samples[idx++] = m_data[pos]; + lbs.push(lb); + rbs.push(mid); + lbs.push(mid + 1); + rbs.push(rb); + } + } + } +public: + bit_vector_il() + {} + bit_vector_il(bit_vector_il const &) = default; + bit_vector_il(bit_vector_il &&) = default; + bit_vector_il & operator=(bit_vector_il const &) = default; + bit_vector_il & operator=(bit_vector_il &&) = default; + bit_vector_il(bit_vector const & bv) + { + m_size = bv.size(); + m_superblocks = (m_size + t_bs) / t_bs; + m_block_shift = bits::hi(t_bs); + size_type blocks = (m_size + 64) / 64; + size_type mem = blocks + m_superblocks + 1; + m_data = int_vector<64>(mem); + m_block_num = mem; + uint64_t const * bvp = bv.data(); + size_type j = 0; + size_type cum_sum = 0; + size_type sample_rate = t_bs / 64; + for (size_type i = 0, sample_cnt = sample_rate; i < blocks; ++i, ++sample_cnt) + { + if (sample_cnt == sample_rate) + { + m_data[j] = cum_sum; + sample_cnt = 0; + j++; + } + m_data[j] = bvp[i]; + cum_sum += bits::cnt(m_data[j]); + j++; + } + m_data[j] = cum_sum; + if (m_block_num > 1024 * 64) + { + m_rank_samples.resize(std::min(1024ULL, 1ULL << bits::hi(m_superblocks))); + } + init_rank_samples(); + } + value_type operator[](size_type i) const + { + assert(i < m_size); + size_type bs = i >> m_block_shift; + size_type block = bs + (i >> 6) + 1; + return ((m_data[block] >> (i & 63)) & 1ULL); + } + uint64_t get_int(size_type idx, uint8_t len = 64) const + { + assert(idx + len - 1 < m_size); + size_type bs = idx >> m_block_shift; + size_type b_block = bs + (idx >> 6) + 1; + bs = (idx + len - 1) >> m_block_shift; + size_type e_block = bs + ((idx + len - 1) >> 6) + 1; + if (b_block == e_block) + { + return (m_data[b_block] >> (idx & 63)) & bits::lo_set[len]; + } + else + { + uint8_t b_len = 64 - (idx & 63); + return (m_data[b_block] >> (idx & 63)) | (m_data[e_block] & bits::lo_set[len - b_len]) << b_len; + } + } + size_type size() const + { + return m_size; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += write_member(m_block_num, out, child, "block_num"); + written_bytes += write_member(m_superblocks, out, child, "superblocks"); + written_bytes += write_member(m_block_shift, out, child, "block_shift"); + written_bytes += m_data.serialize(out, child, "data"); + written_bytes += m_rank_samples.serialize(out, child, "rank_samples"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(m_size, in); + read_member(m_block_num, in); + read_member(m_superblocks, in); + read_member(m_block_shift, in); + m_data.load(in); + m_rank_samples.load(in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_block_num)); + ar(CEREAL_NVP(m_superblocks)); + ar(CEREAL_NVP(m_block_shift)); + ar(CEREAL_NVP(m_data)); + ar(CEREAL_NVP(m_rank_samples)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_block_num)); + ar(CEREAL_NVP(m_superblocks)); + ar(CEREAL_NVP(m_block_shift)); + ar(CEREAL_NVP(m_data)); + ar(CEREAL_NVP(m_rank_samples)); + } + iterator begin() const + { + return iterator(this, 0); + } + iterator end() const + { + return iterator(this, size()); + } + bool operator==(bit_vector_il const & v) const + { + return m_size == v.m_size && m_data == v.m_data; + } + bool operator!=(bit_vector_il const & v) const + { + return !(*this == v); + } +}; +template +class rank_support_il +{ + static_assert(t_b == 1 or t_b == 0, "rank_support_il only supports bitpatterns 0 or 1."); +public: + typedef bit_vector::size_type size_type; + typedef bit_vector_il bit_vector_type; + enum + { + bit_pat = t_b + }; + enum + { + bit_pat_len = (uint8_t)1 + }; +private: + bit_vector_type const * m_v; + size_type m_block_shift; + size_type m_block_mask; + size_type m_block_size_U64; + inline size_type rank1(size_type i) const + { + size_type SBlockNum = i >> m_block_shift; + size_type SBlockPos = (SBlockNum << m_block_size_U64) + SBlockNum; + uint64_t resp = m_v->m_data[SBlockPos]; + uint64_t const * B = (m_v->m_data.data() + (SBlockPos + 1)); + uint64_t rem = i & 63; + uint64_t bits = (i & m_block_mask) - rem; + while (bits) + { + resp += bits::cnt(*B++); + bits -= 64; + } + resp += bits::cnt(*B & bits::lo_set[rem]); + return resp; + } + inline size_type rank0(size_type i) const + { + size_type SBlockNum = i >> m_block_shift; + size_type SBlockPos = (SBlockNum << m_block_size_U64) + SBlockNum; + uint64_t resp = (SBlockNum << m_block_shift) - m_v->m_data[SBlockPos]; + uint64_t const * B = (m_v->m_data.data() + (SBlockPos + 1)); + uint64_t rem = i & 63; + uint64_t bits = (i & m_block_mask) - rem; + while (bits) + { + resp += bits::cnt(~(*B)); + B++; + bits -= 64; + } + resp += bits::cnt((~(*B)) & bits::lo_set[rem]); + return resp; + } +public: + rank_support_il(bit_vector_type const * v = nullptr) + { + set_vector(v); + m_block_shift = bits::hi(t_bs); + m_block_mask = t_bs - 1; + m_block_size_U64 = bits::hi(t_bs >> 6); + } + size_type rank(size_type i) const + { + if (t_b) + return rank1(i); + return rank0(i); + } + size_type operator()(size_type i) const + { + return rank(i); + } + size_type size() const + { + return m_v->size(); + } + void set_vector(bit_vector_type const * v = nullptr) + { + m_v = v; + } + rank_support_il & operator=(rank_support_il const & rs) + { + if (this != &rs) + { + set_vector(rs.m_v); + } + return *this; + } + void load(std::istream &, bit_vector_type const * v = nullptr) + { + set_vector(v); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + return serialize_empty_object(out, v, name, this); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const + {} + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t &) + {} + bool operator==(rank_support_il const & other) const noexcept + { + return (*m_v == *other.m_v); + } + bool operator!=(rank_support_il const & other) const noexcept + { + return !(*this == other); + } +}; +template +class select_support_il +{ + static_assert(t_b == 1 or t_b == 0, "select_support_il only supports bitpatterns 0 or 1."); +public: + typedef bit_vector::size_type size_type; + typedef bit_vector_il bit_vector_type; + enum + { + bit_pat = t_b + }; + enum + { + bit_pat_len = (uint8_t)1 + }; +private: + bit_vector_type const * m_v; + size_type m_superblocks; + size_type m_block_shift; + size_type m_block_size_U64; + size_type select1(size_type i) const + { + size_type lb = 0, rb = m_v->m_superblocks; + size_type res = 0; + size_type idx = 0; + while (lb < rb) + { + size_type mid = (lb + rb) / 2; +#ifndef NOSELCACHE + if (idx < m_v->m_rank_samples.size()) + { + if (m_v->m_rank_samples[idx] >= i) + { + idx = (idx << 1) + 1; + rb = mid; + } + else + { + idx = (idx << 1) + 2; + lb = mid + 1; + } + } + else + { +#endif + size_type pos = (mid << m_block_size_U64) + mid; + if (m_v->m_data[pos] >= i) + { + rb = mid; + } + else + { + lb = mid + 1; + } +#ifndef NOSELCACHE + } +#endif + } + res = (rb - 1) << m_block_shift; + uint64_t const * w = m_v->m_data.data() + ((rb - 1) << m_block_size_U64) + (rb - 1); + i -= *w; + ++w; + size_type ones = bits::cnt(*w); + while (ones < i) + { + i -= ones; + ++w; + ones = bits::cnt(*w); + res += 64; + } + res += bits::sel(*w, i); + return res; + } + size_type select0(size_type i) const + { + size_type lb = 0, rb = m_v->m_superblocks; + size_type res = 0; + size_type idx = 0; + while (lb < rb) + { + size_type mid = (lb + rb) / 2; +#ifndef NOSELCACHE + if (idx < m_v->m_rank_samples.size()) + { + if (((mid << m_block_shift) - m_v->m_rank_samples[idx]) >= i) + { + idx = (idx << 1) + 1; + rb = mid; + } + else + { + idx = (idx << 1) + 2; + lb = mid + 1; + } + } + else + { +#endif + size_type pos = (mid << m_block_size_U64) + mid; + if (((mid << m_block_shift) - m_v->m_data[pos]) >= i) + { + rb = mid; + } + else + { + lb = mid + 1; + } +#ifndef NOSELCACHE + } +#endif + } + res = (rb - 1) << m_block_shift; + uint64_t const * w = m_v->m_data.data() + ((rb - 1) << m_block_size_U64) + (rb - 1); + i = i - (res - *w); + ++w; + size_type zeros = bits::cnt(~*w); + while (zeros < i) + { + i -= zeros; + ++w; + zeros = bits::cnt(~*w); + res += 64; + } + res += bits::sel(~*w, i); + return res; + } +public: + select_support_il(bit_vector_type const * v = nullptr) + { + set_vector(v); + m_block_shift = bits::hi(t_bs); + m_block_size_U64 = bits::hi(t_bs >> 6); + } + size_type select(size_type i) const + { + if (t_b) + return select1(i); + return select0(i); + } + size_type operator()(size_type i) const + { + return select(i); + } + size_type size() const + { + return m_v->size(); + } + void set_vector(bit_vector_type const * v = nullptr) + { + m_v = v; + } + select_support_il & operator=(select_support_il const & rs) + { + if (this != &rs) + { + set_vector(rs.m_v); + } + return *this; + } + void load(std::istream &, bit_vector_type const * v = nullptr) + { + set_vector(v); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + return serialize_empty_object(out, v, name, this); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const + {} + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t &) + {} + bool operator==(select_support_il const & other) const noexcept + { + return (*m_v == *other.m_v); + } + bool operator!=(select_support_il const & other) const noexcept + { + return !(*this == other); + } +}; +} +#endif +#ifndef INCLUDED_SDSL_HYB_VECTOR +#define INCLUDED_SDSL_HYB_VECTOR +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +template +class rank_support_hyb; +template +class select_support_hyb; +template +class hyb_vector +{ +public: + typedef bit_vector::size_type size_type; + typedef bit_vector::value_type value_type; + typedef bit_vector::difference_type difference_type; + typedef random_access_const_iterator iterator; + typedef rank_support_hyb<1, k_sblock_rate> rank_1_type; + typedef rank_support_hyb<0, k_sblock_rate> rank_0_type; + typedef select_support_hyb<1, k_sblock_rate> select_1_type; + typedef select_support_hyb<0, k_sblock_rate> select_0_type; + friend class rank_support_hyb<1, k_sblock_rate>; + friend class rank_support_hyb<0, k_sblock_rate>; + friend class select_support_hyb<1, k_sblock_rate>; + friend class select_support_hyb<0, k_sblock_rate>; +private: + static const uint32_t k_block_size; + static const uint32_t k_block_bytes; + static const uint32_t k_sblock_header_size; + static const uint32_t k_sblock_size; + static const uint32_t k_hblock_rate; + size_type m_size = 0; + int_vector<8> m_trunk; + int_vector<8> m_sblock_header; + int_vector<64> m_hblock_header; +public: + hyb_vector() = default; + hyb_vector(hyb_vector const & hybrid) = default; + hyb_vector(hyb_vector && hybrid) = default; + hyb_vector & operator=(hyb_vector const & hybrid) = default; + hyb_vector & operator=(hyb_vector && hybrid) = default; + hyb_vector(bit_vector const & bv) + { + m_size = bv.size(); + size_type n_blocks = (m_size + k_block_size - 1) / k_block_size; + size_type n_sblocks = (n_blocks + k_sblock_rate - 1) / k_sblock_rate; + size_type n_hblocks = (n_blocks + k_hblock_rate - 1) / k_hblock_rate; + size_type trunk_size = 0; + int_vector<8> runs_lookup(65536, 0); + runs_lookup[0] = 0; + for (uint32_t i = 1; i < 65536; ++i) + { + runs_lookup[i] = runs_lookup[i >> 1]; + if (i >= 32768) + --runs_lookup[i]; + if ((i & 1) != ((i >> 1) & 1)) + ++runs_lookup[i]; + } + uint64_t const * bv_ptr = bv.data(); + for (size_type block_id = 0; block_id < n_blocks; ++block_id) + { + size_type block_beg = block_id * k_block_size; + size_type block_end = block_beg + k_block_size; + uint32_t ones = 0; + uint32_t runs = 0; + if (block_end <= m_size) + { + uint64_t const * ptr64 = bv_ptr; + for (uint8_t i = 0; i < 4; ++i) + ones += bits::cnt(*ptr64++); + ptr64 = bv_ptr; + for (uint8_t i = 0; i < 4; ++i) + { + for (uint8_t j = 0; j < 4; ++j) + runs += runs_lookup[((*ptr64) >> (16 * j)) & 0xffff]; + for (uint8_t j = 0; j < 3; ++j) + runs += ((((*ptr64) >> (16 * j + 15)) & 1) ^ (((*ptr64) >> (16 * j + 16)) & 1)); + ++ptr64; + } + ptr64 = bv_ptr; + for (uint8_t i = 0; i < 3; ++i) + { + runs += ((((*ptr64) >> 63) & 1) ^ ((*(ptr64 + 1)) & 1)); + ++ptr64; + } + ++runs; + } + else + { + uint8_t prevbit = 2; + for (size_type i = block_beg; i < block_end; ++i) + { + uint8_t bit = (i < m_size ? bv[i] : 0); + if (bit == 1) + ++ones; + if (bit != prevbit) + ++runs; + prevbit = bit; + } + } + uint32_t minority_enc_size = std::min(ones, k_block_size - ones); + uint32_t runs_enc_size = (uint32_t)std::max(0, (int32_t)runs - 2); + uint32_t best_enc_size = std::min(minority_enc_size, runs_enc_size); + best_enc_size = std::min(best_enc_size, k_block_bytes); + trunk_size += best_enc_size; + bv_ptr += k_block_size / 64; + } + m_sblock_header = int_vector<8>(n_sblocks * k_sblock_header_size, 0); + m_hblock_header = int_vector<64>(n_hblocks * 2, 0); + m_trunk = int_vector<8>(trunk_size, 0); + size_type tot_rank = 0; + size_type sblock_ones = 0; + size_type trunk_ptr = 0; + bv_ptr = bv.data(); + for (size_type block_id = 0; block_id < n_blocks; ++block_id) + { + size_type block_beg = block_id * k_block_size; + size_type block_end = block_beg + k_block_size; + size_type sblock_id = block_id / k_sblock_rate; + size_type hblock_id = block_id / k_hblock_rate; + if (!(block_id % k_hblock_rate)) + { + m_hblock_header[2 * hblock_id] = trunk_ptr; + m_hblock_header[2 * hblock_id + 1] = tot_rank; + } + if (!(block_id % k_sblock_rate)) + { + uint32_t * ptr = (uint32_t *)(((uint8_t *)m_sblock_header.data()) + k_sblock_header_size * sblock_id); + *ptr++ = trunk_ptr - m_hblock_header[2 * hblock_id]; + *ptr = tot_rank - m_hblock_header[2 * hblock_id + 1]; + if (sblock_id && (!sblock_ones || sblock_ones == k_sblock_size)) + { + ptr = (uint32_t *)(((uint8_t *)m_sblock_header.data()) + k_sblock_header_size * (sblock_id - 1)); + *ptr |= 0x80000000; + } + sblock_ones = 0; + } + uint32_t ones = 0; + uint32_t runs = 0; + if (block_end <= m_size) + { + uint64_t const * ptr64 = bv_ptr; + for (uint8_t i = 0; i < 4; ++i) + ones += bits::cnt(*ptr64++); + ptr64 = bv_ptr; + for (uint8_t i = 0; i < 4; ++i) + { + for (uint8_t j = 0; j < 4; ++j) + runs += runs_lookup[((*ptr64) >> (16 * j)) & 0xffff]; + for (uint8_t j = 0; j < 3; ++j) + runs += ((((*ptr64) >> (16 * j + 15)) & 1) ^ (((*ptr64) >> (16 * j + 16)) & 1)); + ++ptr64; + } + ptr64 = bv_ptr; + for (uint8_t i = 0; i < 3; ++i) + { + runs += ((((*ptr64) >> 63) & 1) ^ ((*(ptr64 + 1)) & 1)); + ++ptr64; + } + ++runs; + } + else + { + uint8_t prevbit = 2; + for (size_type i = block_beg; i < block_end; ++i) + { + uint8_t bit = (i < m_size ? bv[i] : 0); + if (bit == 1) + ++ones; + if (bit != prevbit) + ++runs; + prevbit = bit; + } + } + uint32_t zeros = k_block_size - ones; + uint16_t * header_ptr16 = + (uint16_t *)(((uint8_t *)m_sblock_header.data()) + sblock_id * k_sblock_header_size + 8 + + (block_id % k_sblock_rate) * 2); + (*header_ptr16) = ones; + if (ones == k_block_size) + (*header_ptr16) |= 0x200; + if (0 < ones && ones < k_block_size) + { + uint32_t minority_enc_size = std::min(ones, zeros); + uint32_t runs_enc_size = (uint32_t)std::max(0, (int32_t)runs - 2); + uint32_t best_enc_size = std::min(minority_enc_size, runs_enc_size); + if (k_block_bytes <= best_enc_size) + { + (*header_ptr16) |= (k_block_bytes << 10); + if (block_end <= m_size) + { + for (uint8_t i = 0; i < 4; ++i) + { + *((uint64_t *)(((uint8_t *)m_trunk.data()) + trunk_ptr)) = *(bv_ptr + i); + trunk_ptr += 8; + } + } + else + { + for (size_type i = block_beg; i < block_end; i += 64) + { + uint64_t w = 0; + for (size_type j = i; j < std::min(i + 64, block_end); ++j) + { + uint8_t bit = (j < m_size ? bv[j] : 0); + if (bit) + w |= ((uint64_t)1 << (j - i)); + } + *((uint64_t *)(((uint8_t *)m_trunk.data()) + trunk_ptr)) = w; + trunk_ptr += 8; + } + } + } + else + { + if (runs_enc_size < minority_enc_size) + { + (*header_ptr16) |= (runs_enc_size << 10); + (*header_ptr16) |= (bv[block_beg] << 9); + if (block_end <= m_size) + { + uint32_t runid = 0; + uint64_t const * ptr64 = bv_ptr; + uint64_t w = 0; + for (uint8_t i = 0; runid < runs_enc_size && i < 4; ++i) + { + if (i > 0 && (w & 1) != ((*ptr64) & 1)) + m_trunk[trunk_ptr + runid++] = 64 * i - 1; + w = (*ptr64++); + for (uint8_t j = 0; runid < runs_enc_size && j < 63; ++j) + { + if ((w & 1) != ((w >> 1) & 1)) + m_trunk[trunk_ptr + runid++] = j + i * 64; + w >>= 1; + } + } + trunk_ptr += runid; + } + else + { + uint8_t prevbit = 2; + uint32_t runid = 0; + for (size_type i = block_beg; runid < runs_enc_size; ++i) + { + uint8_t bit = (i < m_size ? bv[i] : 0); + if (bit != prevbit && i != block_beg) + m_trunk[trunk_ptr + runid++] = (i - block_beg - 1); + prevbit = bit; + } + trunk_ptr += runid; + } + } + else + { + (*header_ptr16) |= (minority_enc_size << 10); + if (ones < zeros) + (*header_ptr16) |= 0x200; + uint8_t keybit = (ones < zeros); + if (block_end <= m_size) + { + uint64_t const * ptr64 = bv_ptr; + for (uint8_t i = 0; i < 4; ++i) + { + uint64_t w = (*ptr64++); + for (uint8_t j = 0; j < 64; ++j) + { + if ((w & 1) == keybit) + m_trunk[trunk_ptr++] = j + 64 * i; + w >>= 1; + } + } + } + else + { + for (size_type i = block_beg; i < block_end; ++i) + { + uint8_t bit = (i < m_size ? bv[i] : 0); + if (bit == keybit) + m_trunk[trunk_ptr++] = i - block_beg; + } + } + } + } + } + tot_rank += ones; + sblock_ones += ones; + bv_ptr += k_block_size / 64; + } + } +private: + value_type access0(size_type i) const + { + assert(i > 0); + assert(i <= m_size); + size_type block_id = (i - 1) / k_block_size; + size_type sblock_id = block_id / k_sblock_rate; + size_type hblock_id = block_id / k_hblock_rate; + size_type trunk_base = m_hblock_header[2 * hblock_id]; + uint32_t local_i = i - block_id * k_block_size; + uint8_t const * header_ptr8 = ((uint8_t const *)m_sblock_header.data()) + (sblock_id * k_sblock_header_size); + uint32_t * header_ptr32 = (uint32_t *)header_ptr8; + size_type trunk_ptr = trunk_base + ((*header_ptr32) & 0x3fffffff); + header_ptr8 += 8; + uint16_t * header_ptr16 = (uint16_t *)header_ptr8; + if ((*header_ptr32) & 0x80000000) + return (value_type)((*(header_ptr8 + 1)) & 0x01); + for (size_type j = sblock_id * k_sblock_rate; j != block_id; ++j) + { + trunk_ptr += ((*header_ptr16) >> 10); + ++header_ptr16; + } + uint8_t const * trunk_p = ((uint8_t const *)m_trunk.data()) + trunk_ptr; + uint32_t encoding_size = ((*header_ptr16) >> 10); + uint32_t ones = ((*header_ptr16) & 0x1ff); + uint32_t zeros = k_block_size - ones; + uint32_t special_bit = (((*header_ptr16) & 0x200) >> 9); + if (!encoding_size) + { + uint32_t first_run_length = special_bit * ones + (1 - special_bit) * zeros; + uint8_t inside_second_run = (first_run_length < local_i); + return (inside_second_run ^ special_bit); + } + if (encoding_size < k_block_bytes) + { + if (std::min(ones, zeros) == encoding_size) + { + uint32_t tot = 0; + while (tot < encoding_size && *trunk_p < local_i) + { + ++trunk_p; + ++tot; + } + uint8_t last_was_majority = ((!tot) || (*(trunk_p - 1) != local_i - 1)); + return (last_was_majority ^ special_bit); + } + if (special_bit) + { + uint32_t j = 0; + uint32_t acc = 0; + int32_t last = -1; + while (j + 1 < encoding_size && *(trunk_p + 1) < local_i) + { + acc += *trunk_p - last; + ++trunk_p; + last = *trunk_p; + ++trunk_p; + j += 2; + } + uint8_t access_i = 0; + if (j + 1 >= encoding_size) + { + if (j < encoding_size) + { + if (local_i <= (uint32_t)(*trunk_p) + 1) + access_i = (((int32_t)local_i - last - 1) > 0); + else + { + acc += (int32_t)(*trunk_p) - last; + if (ones - acc <= k_block_size - local_i) + access_i = 0; + else + access_i = 1; + } + } + else + { + if ((int32_t)(ones - acc) < (int32_t)local_i - last - 1) + access_i = 0; + else + access_i = (((int32_t)local_i - last - 1) > 0); + } + } + else + { + if ((*trunk_p) < local_i - 1) + access_i = 0; + else + access_i = (((int32_t)local_i - last - 1) > 0); + } + return access_i; + } + else + { + uint32_t j = 0; + uint32_t acc = 0; + int32_t last = -1; + while (j + 1 < encoding_size && *(trunk_p + 1) < local_i) + { + acc += *trunk_p - last; + ++trunk_p; + last = *trunk_p; + ++trunk_p; + j += 2; + } + uint8_t access_i = 0; + if (j + 1 >= encoding_size) + { + if (j < encoding_size) + { + if (local_i <= (uint32_t)(*trunk_p) + 1) + access_i = (((int32_t)local_i - last - 1) == 0); + else + { + acc += (*trunk_p) - last; + if (zeros - acc <= k_block_size - local_i) + access_i = 1; + else + access_i = 0; + } + } + else + { + if ((int32_t)(zeros - acc) < (int32_t)local_i - last - 1) + access_i = 1; + else + access_i = ((local_i - last - 1) == 0); + } + } + else + { + if ((*trunk_p) < local_i - 1) + access_i = 1; + else + access_i = (((int32_t)local_i - last - 1) == 0); + } + return access_i; + } + } + else + { + uint64_t * trunk_ptr64 = (uint64_t *)(((uint8_t *)m_trunk.data()) + trunk_ptr); + uint32_t bit; + for (bit = 0; bit + 64 <= local_i; bit += 64) + trunk_ptr64++; + uint8_t access_i = 0; + if (bit != local_i) + access_i = (((*trunk_ptr64) >> (local_i - bit - 1)) & 1); + else + access_i = (((*(trunk_ptr64 - 1)) >> 63) & 1); + return access_i; + } + } +public: + uint64_t get_int(size_type idx, const uint8_t len = 64) const + { + uint64_t res = 0; + for (size_t i = 0; i < len; ++i) + { + res <<= 1; + res |= (*this)[idx + len - 1 - i]; + } + return res; + } + value_type operator[](size_type i) const + { + return access0(i + 1); + } + size_type size() const + { + return m_size; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += m_trunk.serialize(out, child, "trunk"); + written_bytes += m_sblock_header.serialize(out, child, "sblock_header"); + written_bytes += m_hblock_header.serialize(out, child, "hblock_header"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(m_size, in); + m_trunk.load(in); + m_sblock_header.load(in); + m_hblock_header.load(in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_trunk)); + ar(CEREAL_NVP(m_sblock_header)); + ar(CEREAL_NVP(m_hblock_header)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_trunk)); + ar(CEREAL_NVP(m_sblock_header)); + ar(CEREAL_NVP(m_hblock_header)); + } + iterator begin() const + { + return iterator(this, 0); + } + iterator end() const + { + return iterator(this, size()); + } + bool operator==(hyb_vector const & v) const + { + return m_size == v.m_size && m_trunk == v.m_trunk && m_sblock_header == v.m_sblock_header + && m_hblock_header == v.m_hblock_header; + } + bool operator!=(hyb_vector const & v) const + { + return !(*this == v); + } +}; +template +const uint32_t hyb_vector::k_block_size = 256; +template +const uint32_t hyb_vector::k_block_bytes = 32; +template +const uint32_t hyb_vector::k_sblock_header_size = 8 + 2 * k_sblock_rate; +template +const uint32_t hyb_vector::k_sblock_size = 256 * k_sblock_rate; +template +const uint32_t hyb_vector::k_hblock_rate = (1U << 31) / 256; +template +struct rank_result +{ + typedef bit_vector::size_type size_type; + static size_type adapt(size_type res, size_type) + { + return res; + } +}; +template <> +struct rank_result<0> +{ + typedef bit_vector::size_type size_type; + static size_type adapt(size_type res, size_type i) + { + return i - res; + } +}; +template +class rank_support_hyb +{ +public: + typedef hyb_vector bit_vector_type; + typedef typename bit_vector_type::size_type size_type; + enum + { + bit_pat = t_b + }; + enum + { + bit_pat_len = (uint8_t)1 + }; +private: + bit_vector_type const * m_v; +public: + explicit rank_support_hyb(bit_vector_type const * v = nullptr) + { + set_vector(v); + } + const size_type rank(size_type i) const + { + assert(m_v != nullptr); + assert(i <= m_v->size()); + if (i <= 0) + return 0; + size_type block_id = (i - 1) / bit_vector_type::k_block_size; + size_type sblock_id = block_id / k_sblock_rate; + size_type hblock_id = block_id / bit_vector_type::k_hblock_rate; + size_type trunk_base = m_v->m_hblock_header[2 * hblock_id]; + size_type hblock_rank = m_v->m_hblock_header[2 * hblock_id + 1]; + uint32_t local_i = i - block_id * bit_vector_type::k_block_size; + uint8_t const * header_ptr8 = + ((uint8_t const *)(m_v->m_sblock_header.data())) + (sblock_id * bit_vector_type::k_sblock_header_size); + uint32_t * header_ptr32 = (uint32_t *)header_ptr8; + size_type trunk_ptr = trunk_base + ((*header_ptr32) & 0x3fffffff); + size_type sblock_rank = *(header_ptr32 + 1); + header_ptr8 += 8; + uint16_t * header_ptr16 = (uint16_t *)header_ptr8; + if ((*header_ptr32) & 0x80000000) + { + return rank_result::adapt(hblock_rank + sblock_rank + + ((*(header_ptr8 + 1)) & 0x01) + * (i - sblock_id * bit_vector_type::k_sblock_size), + i); + } + size_type block_rank = 0; + for (size_type j = sblock_id * k_sblock_rate; j != block_id; ++j) + { + trunk_ptr += ((*header_ptr16) >> 10); + block_rank += ((*header_ptr16) & 0x1ff); + ++header_ptr16; + } + uint8_t const * trunk_p = ((uint8_t *)m_v->m_trunk.data()) + trunk_ptr; + uint32_t encoding_size = ((*header_ptr16) >> 10); + uint32_t ones = ((*header_ptr16) & 0x1ff); + uint32_t zeros = bit_vector_type::k_block_size - ones; + uint32_t special_bit = (((*header_ptr16) & 0x200) >> 9); + if (!encoding_size) + { + uint32_t first_run_length = special_bit * ones + (1 - special_bit) * zeros; + uint32_t local_rank = std::min(local_i, first_run_length); + return rank_result::adapt( + hblock_rank + sblock_rank + block_rank + + (special_bit * local_rank + (1 - special_bit) * (local_i - local_rank)), + i); + } + if (encoding_size < bit_vector_type::k_block_bytes) + { + if (std::min(ones, zeros) == encoding_size) + { + uint32_t tot = 0; + while (tot < encoding_size && (*trunk_p++) < local_i) + ++tot; + return rank_result::adapt(hblock_rank + sblock_rank + block_rank + special_bit * tot + + (1 - special_bit) * (local_i - tot), + i); + } + if (special_bit) + { + uint32_t j = 0; + uint32_t acc = 0; + int32_t last = -1; + while (j + 1 < encoding_size && *(trunk_p + 1) < local_i) + { + acc += *trunk_p - last; + ++trunk_p; + last = *trunk_p; + ++trunk_p; + j += 2; + } + if (j + 1 >= encoding_size) + { + if (j < encoding_size) + { + if (*trunk_p >= local_i) + acc += local_i - last - 1; + else + { + acc += (*trunk_p) - last; + acc += (ones - acc) - std::min(ones - acc, bit_vector_type::k_block_size - local_i); + } + } + else + acc += std::min(ones - acc, local_i - last - 1); + } + else + acc += std::min((int32_t)(*trunk_p), (int32_t)local_i - 1) - last; + return rank_result::adapt(hblock_rank + sblock_rank + block_rank + acc, i); + } + else + { + uint32_t j = 0; + uint32_t acc = 0; + int32_t last = -1; + while (j + 1 < encoding_size && *(trunk_p + 1) < local_i) + { + acc += *trunk_p - last; + ++trunk_p; + last = *trunk_p; + ++trunk_p; + j += 2; + } + if (j + 1 >= encoding_size) + { + if (j < encoding_size) + { + if (*trunk_p >= local_i) + acc += local_i - last - 1; + else + { + acc += (*trunk_p) - last; + acc += (zeros - acc) - std::min(zeros - acc, bit_vector_type::k_block_size - local_i); + } + } + else + acc += std::min(zeros - acc, local_i - last - 1); + } + else + acc += std::min((int32_t)(*trunk_p), (int32_t)local_i - 1) - last; + return rank_result::adapt(hblock_rank + sblock_rank + block_rank + (local_i - acc), i); + } + } + else + { + uint64_t * trunk_ptr64 = (uint64_t *)(((uint8_t *)m_v->m_trunk.data()) + trunk_ptr); + uint32_t bit; + for (bit = 0; bit + 64 <= local_i; bit += 64) + block_rank += bits::cnt(*trunk_ptr64++); + if (bit != local_i) + block_rank += bits::cnt((*trunk_ptr64) & (((uint64_t)1 << (local_i - bit)) - 1)); + return rank_result::adapt(hblock_rank + sblock_rank + block_rank, i); + } + } + const size_type operator()(size_type i) const + { + return rank(i); + } + const size_type size() const + { + return m_v->size(); + } + void set_vector(bit_vector_type const * v = nullptr) + { + m_v = v; + } + rank_support_hyb & operator=(rank_support_hyb const & rs) + { + if (this != &rs) + { + set_vector(rs.m_v); + } + return *this; + } + void load(std::istream &, bit_vector_type const * v = nullptr) + { + set_vector(v); + } + size_type serialize(std::ostream &, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + structure_tree::add_size(child, 0); + return 0; + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const + {} + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t &) + {} + bool operator==(rank_support_hyb const & other) const noexcept + { + return *m_v == *other.m_v; + } + bool operator!=(rank_support_hyb const & other) const noexcept + { + return !(*this == other); + } +}; +template +class select_support_hyb +{ +public: + typedef hyb_vector bit_vector_type; + typedef typename bit_vector_type::size_type size_type; + enum + { + bit_pat = t_b + }; + enum + { + bit_pat_len = (uint8_t)1 + }; +private: + bit_vector_type const * m_v; +public: + explicit select_support_hyb(bit_vector_type const * v = nullptr) + { + set_vector(v); + } + size_type select(size_type) const + { + fprintf(stderr, "\nhyb_vector: select queries are not currently supported\n"); + std::exit(EXIT_FAILURE); + } + const size_type operator()(size_type i) const + { + return select(i); + } + const size_type size() const + { + return m_v->size(); + } + void set_vector(bit_vector_type const * v = nullptr) + { + m_v = v; + } + select_support_hyb & operator=(select_support_hyb const & rs) + { + if (this != &rs) + { + set_vector(rs.m_v); + } + return *this; + } + void load(std::istream &, bit_vector_type const * v = nullptr) + { + set_vector(v); + } + size_type serialize(std::ostream &, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + structure_tree::add_size(child, 0); + return 0; + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const + {} + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t &) + {} + bool operator==(select_support_hyb const & other) const noexcept + { + return *m_v == *other.m_v; + } + bool operator!=(select_support_hyb const & other) const noexcept + { + return !(*this == other); + } +}; +} +#endif +#ifndef INCLUDED_SDSL_RRR_VECTOR +#define INCLUDED_SDSL_RRR_VECTOR +#include +#include +#include +#include +#include +#ifndef SDSL_RRR_HELPER +#define SDSL_RRR_HELPER +#ifdef RRR_NO_OPT +# ifndef RRR_NO_BS +#define RRR_NO_BS +# endif +#endif +#include +#ifndef INCLUDED_SDSL_UINT128 +#define INCLUDED_SDSL_UINT128 +#include +#include +namespace sdsl +{ +#if defined(__GNUC__) +typedef unsigned int uint128_t __attribute__((mode(TI))); +#else +class uint128_t +{ +public: + friend std::ostream & operator<<(std::ostream &, uint128_t const &); +private: + uint64_t m_lo; + uint64_t m_high; +public: + inline uint128_t(uint64_t lo = 0, uint64_t high = 0) : m_lo(lo), m_high(high) + {} + inline uint128_t(uint128_t const & x) : m_lo(x.m_lo), m_high(x.m_high) + {} + inline uint128_t(uint128_t && x) : m_lo(std::move(x.m_lo)), m_high(std::move(x.m_high)) + {} + uint128_t & operator=(uint128_t const & x) + { + m_lo = x.m_lo; + m_high = x.m_high; + return *this; + } + uint128_t & operator=(uint128_t && x) + { + m_lo = std::move(x.m_lo); + m_high = std::move(x.m_high); + return *this; + } + inline uint8_t popcount() const + { + return (uint8_t)bits::cnt(m_lo) + (uint8_t)bits::cnt(m_high); + } + inline uint16_t hi() const + { + if (m_high == 0ULL) + { + return bits::hi(m_lo); + } + else + { + return bits::hi(m_high) + 64; + } + } + inline uint16_t select(uint32_t i) const + { + uint16_t x = 0; + if ((x = (uint16_t)bits::cnt(m_lo)) >= i) + { + return bits::sel(m_lo, i); + } + i -= x; + return bits::sel(m_high, i) + 64; + } + inline uint128_t & operator+=(uint128_t const & x) + { + *this = *this + x; + return *this; + } + inline uint128_t & operator+=(uint64_t const & x) + { + *this = *this + x; + return *this; + } + inline uint128_t operator+(uint128_t const & x) const + { + return uint128_t(m_lo + x.m_lo, m_high + x.m_high + ((m_lo + x.m_lo) < m_lo)); + } + inline uint128_t operator+(uint64_t const & x) const + { + return uint128_t(m_lo + x, m_high + ((m_lo + x) < m_lo)); + } + inline uint128_t operator-(uint128_t const & x) const + { + return uint128_t(m_lo - x.m_lo, m_high - x.m_high - ((m_lo - x.m_lo) > m_lo)); + } + inline uint128_t operator~() const + { + return uint128_t(~m_lo, ~m_high); + } + inline uint128_t & operator-=(uint128_t const & x) + { + *this = *this - x; + return *this; + } + inline uint128_t operator|(uint128_t const & x) const + { + return uint128_t(m_lo | x.m_lo, m_high | x.m_high); + } + inline uint128_t operator|(uint64_t const & x) const + { + return uint128_t(m_lo | x, m_high); + } + inline uint128_t & operator|=(uint128_t const & x) + { + m_lo |= x.m_lo; + m_high |= x.m_high; + return *this; + } + inline uint128_t operator&(uint128_t const & x) const + { + return uint128_t(m_lo & x.m_lo, m_high & x.m_high); + } + inline uint128_t operator<<(int x) const + { + if (x < 64) + { + auto high = (m_high << x) | (m_lo >> (64 - x)); + auto lo = m_lo << x; + return uint128_t(lo, high); + } + else + { + auto high = m_lo << (x - 64); + return uint128_t(0, high); + } + } + inline uint128_t operator>>(int x) const + { + if (x < 64) + { + auto lo = (m_lo >> x) | (m_high << (64 - x)); + return uint128_t(lo, m_high >> x); + } + else + { + auto lo = m_high >> (x - 64); + return uint128_t(lo, 0); + } + } + inline uint128_t & operator=(uint64_t const & x) + { + m_high = 0; + m_lo = x; + return *this; + } + inline bool operator==(uint128_t const & x) const + { + return (m_lo == x.m_lo) and (m_high == x.m_high); + } + inline bool operator==(uint64_t const & x) const + { + return (m_lo == x) and (m_high == 0); + } + inline bool operator!=(uint128_t const & x) const + { + return !(*this == x); + } + inline bool operator>=(uint128_t const & x) const + { + if (m_high != x.m_high) + { + return m_high > x.m_high; + } + else + { + return m_lo >= x.m_lo; + } + } + inline bool operator<=(uint128_t const & x) const + { + if (m_high != x.m_high) + { + return m_high < x.m_high; + } + else + { + return m_lo <= x.m_lo; + } + } + inline bool operator>(uint128_t const & x) const + { + if (m_high != x.m_high) + { + return m_high > x.m_high; + } + else + { + return m_lo > x.m_lo; + } + } + inline bool operator>(uint64_t const & x) const + { + if (m_high > 0) + { + return true; + } + return m_lo > x; + } + inline bool operator<(uint128_t const & x) const + { + if (m_high != x.m_high) + { + return m_high < x.m_high; + } + else + { + return m_lo < x.m_lo; + } + } + inline operator uint64_t() const + { + return m_lo; + } +}; +#endif +inline std::ostream & operator<<(std::ostream & os, uint128_t const & x) +{ + uint64_t X[2] = {(uint64_t)(x >> 64), (uint64_t)x}; + for (int j = 0; j < 2; ++j) + { + for (int i = 0; i < 16; ++i) + { + os << std::hex << ((X[j] >> 60) & 0xFULL) << std::dec; + X[j] <<= 4; + } + } + return os; +} +} +#endif +#ifndef INCLUDED_SDSL_UINT256 +#define INCLUDED_SDSL_UINT256 +#include +#include +#include +namespace sdsl +{ +class uint256_t +{ +public: + friend std::ostream & operator<<(std::ostream &, uint256_t const &); +private: + uint64_t m_lo; + uint64_t m_mid; + uint128_t m_high; +public: + inline uint256_t(uint64_t lo = 0, uint64_t mid = 0, uint128_t high = 0) : m_lo(lo), m_mid(mid), m_high(high) + {} + inline uint256_t(uint256_t const & x) : m_lo(x.m_lo), m_mid(x.m_mid), m_high(x.m_high) + {} + inline uint256_t(uint256_t && x) : m_lo(std::move(x.m_lo)), m_mid(std::move(x.m_mid)), m_high(std::move(x.m_high)) + {} + uint256_t & operator=(uint256_t const & x) + { + m_lo = x.m_lo; + m_mid = x.m_mid; + m_high = x.m_high; + return *this; + } + uint256_t & operator=(uint256_t && x) + { + m_lo = std::move(x.m_lo); + m_mid = std::move(x.m_mid); + m_high = std::move(x.m_high); + return *this; + } + inline uint16_t popcount() + { + return ((uint16_t)bits::cnt(m_lo)) + (uint16_t)bits::cnt(m_mid) + (uint16_t)bits::cnt(m_high >> 64) + + (uint16_t)bits::cnt(m_high); + } + inline uint16_t hi() + { + if (m_high == (uint128_t)0ULL) + { + if (m_mid) + { + return bits::hi(m_mid) + 64; + } + else + { + return bits::hi(m_lo); + } + } + else + { + uint64_t hh = (m_high >> 64); + if (hh) + { + return bits::hi(hh) + 192; + } + else + { + return bits::hi(m_high) + 128; + } + } + } + inline uint16_t select(uint32_t i) + { + uint16_t x = 0; + if ((x = (uint16_t)bits::cnt(m_lo)) >= i) + { + return bits::sel(m_lo, i); + } + i -= x; + if ((x = (uint16_t)bits::cnt(m_mid)) >= i) + { + return bits::sel(m_mid, i) + 64; + } + i -= x; + uint64_t hh = m_high >> 64; + uint64_t lh = m_high; + if ((x = (uint16_t)bits::cnt(lh)) >= i) + { + return bits::sel(lh, i) + 128; + } + i -= x; + return bits::sel(hh, i) + 192; + } + inline uint256_t & operator+=(uint256_t const & x) + { + uint128_t lo = (uint128_t)m_lo + x.m_lo; + uint128_t mid = (uint128_t)m_mid + x.m_mid + (lo >> 64); + m_lo = lo; + m_mid = mid; + m_high += x.m_high + (mid >> 64); + return *this; + } + inline uint256_t operator+(uint256_t const & x) + { + uint128_t lo = ((uint128_t)m_lo) + x.m_lo; + uint128_t mid = (uint128_t)m_mid + x.m_mid + (lo >> 64); + return uint256_t(lo, mid, m_high + x.m_high + (mid >> 64)); + } + inline uint256_t operator-(uint256_t const & x) + { + uint128_t lo = (uint128_t)m_lo + (~x.m_lo) + (uint128_t)1ULL; + uint128_t mid = (uint128_t)m_mid + (~x.m_mid) + (lo >> 64); + return uint256_t(lo, mid, m_high + (~x.m_high) + (mid >> 64)); + } + inline uint256_t & operator-=(uint256_t const & x) + { + uint128_t lo = (uint128_t)m_lo + (~x.m_lo) + (uint128_t)1ULL; + uint128_t mid = (uint128_t)m_mid + (~x.m_mid) + (lo >> 64); + m_lo = lo; + m_mid = mid; + m_high += (~x.m_high) + (mid >> 64); + return *this; + } + inline uint256_t operator|(uint256_t const & x) + { + return uint256_t(m_lo | x.m_lo, m_mid | x.m_mid, m_high | x.m_high); + } + inline uint256_t & operator|=(uint256_t const & x) + { + m_lo |= x.m_lo; + m_mid |= x.m_mid; + m_high |= x.m_high; + return *this; + } + inline uint256_t operator&(uint256_t const & x) + { + return uint256_t(m_lo & x.m_lo, m_mid & x.m_mid, m_high & x.m_high); + } + inline uint256_t operator<<(int x) const + { + if (x < 128) + { + uint128_t high = m_high << x; + uint128_t low = (((uint128_t)m_mid << 64) | m_lo); + high |= (low >> (128 - x)); + low = low << x; + return uint256_t(low, low >> 64, high); + } + else + { + uint128_t high = (((uint128_t)m_mid << 64) | m_lo) << (x - 128); + return uint256_t(0, 0, high); + } + } + inline uint256_t operator>>(int x) const + { + if (x < 128) + { + uint128_t low = (((uint128_t)m_mid << 64) | m_lo) >> x; + low |= ((m_high << (127 - x)) << 1); + return uint256_t(low, low >> 64, m_high >> x); + } + else + { + uint128_t low = (m_high >> (x - 128)); + return uint256_t(low, low >> 64, 0); + } + } + inline uint256_t & operator=(uint64_t const & x) + { + m_high = 0; + m_mid = 0; + m_lo = x; + return *this; + } + inline bool operator==(uint256_t const & x) const + { + return (m_lo == x.m_lo) and (m_mid == x.m_mid) and (m_high == x.m_high); + } + inline bool operator!=(uint256_t const & x) const + { + return !(*this == x); + } + inline bool operator>=(uint256_t const & x) const + { + if (m_high != x.m_high) + { + return m_high > x.m_high; + } + if (m_mid != x.m_mid) + { + return m_mid > x.m_mid; + } + else + { + return m_lo >= x.m_lo; + } + } + inline bool operator<=(uint256_t const & x) const + { + if (m_high != x.m_high) + { + return m_high < x.m_high; + } + if (m_mid != x.m_mid) + { + return m_mid < x.m_mid; + } + else + { + return m_lo <= x.m_lo; + } + } + inline bool operator>(uint256_t const & x) const + { + if (m_high != x.m_high) + { + return m_high > x.m_high; + } + if (m_mid != x.m_mid) + { + return m_mid > x.m_mid; + } + else + { + return m_lo > x.m_lo; + } + } + inline bool operator>(uint64_t const & x) const + { + if (m_high > (uint128_t)0ULL or m_mid > (uint128_t)0ULL) + { + return true; + } + return m_lo > x; + } + inline bool operator<(uint256_t const & x) const + { + if (m_high != x.m_high) + { + return m_high < x.m_high; + } + if (m_mid != x.m_mid) + { + return m_mid < x.m_mid; + } + else + { + return m_lo < x.m_lo; + } + } + inline operator uint64_t() + { + return m_lo; + } +}; +inline std::ostream & operator<<(std::ostream & os, uint256_t const & x) +{ + uint64_t X[4] = {(uint64_t)(x.m_high >> 64), (uint64_t)x.m_high, x.m_mid, x.m_lo}; + for (int j = 0; j < 4; ++j) + { + for (int i = 0; i < 16; ++i) + { + os << std::hex << ((X[j] >> 60) & 0xFULL) << std::dec; + X[j] <<= 4; + } + } + return os; +} +} +#endif +namespace sdsl +{ +template +struct binomial_coefficients_trait +{ + typedef uint64_t number_type; + static inline uint16_t hi(number_type x) + { + return bits::hi(x); + } + template + static inline number_type get_int(bit_vector_type const & bv, typename bit_vector_type::size_type pos, uint16_t len) + { + return bv.get_int(pos, len); + } + template + static void set_int(bit_vector_type & bv, typename bit_vector_type::size_type pos, number_type x, uint16_t len) + { + bv.set_int(pos, x, len); + } + static inline uint16_t popcount(number_type x) + { + return bits::cnt(x); + } +}; +template <> +struct binomial_coefficients_trait<7> +{ + typedef uint128_t number_type; + static inline uint16_t hi(number_type x) + { + if ((x >> 64)) + { + return bits::hi(x >> 64) + 64; + } + else + { + return bits::hi(x); + } + } + template + static inline number_type get_int(bit_vector_type const & bv, typename bit_vector_type::size_type pos, uint16_t len) + { + if (len <= 64) + { + return bv.get_int(pos, len); + } + else + { + return ((((number_type)bv.get_int(pos + 64, len - 64)) << 64) + bv.get_int(pos, 64)); + } + } + template + static void set_int(bit_vector_type & bv, typename bit_vector_type::size_type pos, number_type x, uint16_t len) + { + if (len <= 64) + { + bv.set_int(pos, x, len); + } + else + { + bv.set_int(pos, (uint64_t)x, 64); + bv.set_int(pos + 64, x >> 64, len - 64); + } + } + static inline uint16_t popcount(number_type x) + { + return bits::cnt(x >> 64) + bits::cnt(x); + } +}; +template <> +struct binomial_coefficients_trait<8> +{ + typedef uint256_t number_type; + static inline uint16_t hi(number_type x) + { + return x.hi(); + } + template + static inline number_type get_int(bit_vector_type const & bv, typename bit_vector_type::size_type pos, uint16_t len) + { + if (len <= 64) + { + return number_type(bv.get_int(pos, len)); + } + else if (len <= 128) + { + return number_type(bv.get_int(pos, 64), bv.get_int(pos + 64, len - 64)); + } + else if (len <= 192) + { + return number_type(bv.get_int(pos, 64), + bv.get_int(pos + 64, 64), + (uint128_t)bv.get_int(pos + 128, len - 128)); + } + else + { + return number_type(bv.get_int(pos, 64), + bv.get_int(pos + 64, 64), + (((uint128_t)bv.get_int(pos + 192, len - 192)) << 64) | bv.get_int(pos + 128, 64)); + } + } + template + static void set_int(bit_vector_type & bv, typename bit_vector_type::size_type pos, number_type x, uint16_t len) + { + if (len <= 64) + { + bv.set_int(pos, x, len); + } + else if (len <= 128) + { + bv.set_int(pos, x, 64); + bv.set_int(pos + 64, x >> 64, len - 64); + } + else if (len <= 192) + { + bv.set_int(pos, x, 64); + bv.set_int(pos + 64, x >> 64, 64); + bv.set_int(pos + 128, x >> 128, len - 128); + } + else + { + bv.set_int(pos, x, 64); + bv.set_int(pos + 64, x >> 64, 64); + bv.set_int(pos + 128, x >> 128, 64); + bv.set_int(pos + 192, x >> 192, len - 192); + } + } + static inline uint16_t popcount(number_type x) + { + return x.popcount(); + } +}; +template +struct binomial_table +{ + static struct impl + { + number_type table[n + 1][n + 1]; + number_type L1Mask[n + 1]; + number_type O1Mask[n]; + impl() + { + for (uint16_t k = 0; k <= n; ++k) + { + table[k][k] = 1; + } + for (uint16_t k = 0; k <= n; ++k) + { + table[0][k] = 0; + } + for (uint16_t nn = 0; nn <= n; ++nn) + { + table[nn][0] = 1; + } + for (int nn = 1; nn <= n; ++nn) + { + for (int k = 1; k <= n; ++k) + { + table[nn][k] = table[nn - 1][k - 1] + table[nn - 1][k]; + } + } + L1Mask[0] = 0; + number_type mask = 1; + O1Mask[0] = 1; + for (int i = 1; i <= n; ++i) + { + L1Mask[i] = mask; + if (i < n) + O1Mask[i] = O1Mask[i - 1] << 1; + mask = (mask << 1); + mask |= (number_type)1; + } + } + } data; +}; +template +typename binomial_table::impl binomial_table::data; +template +struct binomial_coefficients +{ + enum + { + MAX_LOG = (n > 128 ? 8 : (n > 64 ? 7 : 6)) + }; + static const uint16_t MAX_SIZE = (1 << MAX_LOG); + typedef binomial_coefficients_trait trait; + typedef typename trait::number_type number_type; + typedef binomial_table tBinom; + static struct impl + { + const number_type (&table)[MAX_SIZE + 1][MAX_SIZE + 1] = + tBinom::data.table; + uint16_t space[n + 1]; +#ifndef RRR_NO_BS + static const uint16_t BINARY_SEARCH_THRESHOLD = n / MAX_LOG; +#else + static const uint16_t BINARY_SEARCH_THRESHOLD = 0; +#endif + number_type (&L1Mask)[MAX_SIZE + 1] = tBinom::data.L1Mask; + number_type (&O1Mask)[MAX_SIZE] = tBinom::data.O1Mask; + impl() + { + static typename binomial_table::impl tmp_data; + for (int k = 0; k <= n; ++k) + { + space[k] = (tmp_data.table[n][k] == (number_type)1) ? 0 : trait::hi(tmp_data.table[n][k]) + 1; + } + } + } data; +}; +template +typename binomial_coefficients::impl binomial_coefficients::data; +template +struct rrr_helper +{ + typedef binomial_coefficients binomial; + typedef typename binomial::number_type number_type; + typedef typename binomial::trait trait; + static inline uint16_t space_for_bt(uint16_t i) + { + return binomial::data.space[i]; + } + template + static inline number_type + decode_btnr(bit_vector_type const & bv, typename bit_vector_type::size_type btnrp, uint16_t btnrlen) + { + return trait::get_int(bv, btnrp, btnrlen); + } + template + static void + set_bt(bit_vector_type & bv, typename bit_vector_type::size_type pos, number_type bt, uint16_t space_for_bt) + { + trait::set_int(bv, pos, bt, space_for_bt); + } + template + static inline uint16_t + get_bt(bit_vector_type const & bv, typename bit_vector_type::size_type pos, uint16_t block_size) + { + return trait::popcount(trait::get_int(bv, pos, block_size)); + } + static inline number_type bin_to_nr(number_type bin) + { + if (bin == (number_type)0 or bin == binomial::data.L1Mask[n]) + { + return 0; + } + number_type nr = 0; + uint16_t k = trait::popcount(bin); + uint16_t nn = n; + while (bin != (number_type)0) + { + if (1ULL & bin) + { + nr += binomial::data.table[nn - 1][k]; + --k; + } + bin = (bin >> 1); + --nn; + } + return nr; + } + static inline bool decode_bit(uint16_t k, number_type nr, uint16_t off) + { +#ifndef RRR_NO_OPT + if (k == n) + { + return 1; + } + else if (k == 0) + { + return 0; + } + else if (k == 1) + { + return (n - nr - 1) == off; + } +#endif + uint16_t nn = n; + if (k + 1 < binomial::data.BINARY_SEARCH_THRESHOLD + 1) + { + while (k > 1) + { + uint16_t nn_lb = k, + nn_rb = nn + 1; + while (nn_lb < nn_rb) + { + uint16_t nn_mid = (nn_lb + nn_rb) / 2; + if (nr >= binomial::data.table[nn_mid - 1][k]) + { + nn_lb = nn_mid + 1; + } + else + { + nn_rb = nn_mid; + } + } + nn = nn_lb - 1; + if (n - nn >= off) + { + return (n - nn) == off; + } + nr -= binomial::data.table[nn - 1][k]; + --k; + --nn; + } + } + else + { + int i = 0; + while (k > 1) + { + if (i > off) + { + return 0; + } + if (nr >= binomial::data.table[nn - 1][k]) + { + nr -= binomial::data.table[nn - 1][k]; + --k; + if (i == off) + return 1; + } + --nn; + ++i; + } + } + return (n - nr - 1) == off; + } + static inline uint64_t decode_int(uint16_t k, number_type nr, uint16_t off, uint16_t len) + { +#ifndef RRR_NO_OPT + if (k == n) + { + return bits::lo_set[len]; + } + else if (k == 0) + { + return 0; + } + else if (k == 1) + { + if (n - nr - 1 >= (number_type)off and n - nr - 1 <= (number_type)(off + len - 1)) + { + return 1ULL << ((n - nr - 1) - off); + } + else + return 0; + } +#endif + uint64_t res = 0; + uint16_t nn = n; + int i = 0; + while (k > 1) + { + if (i > off + len - 1) + { + return res; + } + if (nr >= binomial::data.table[nn - 1][k]) + { + nr -= binomial::data.table[nn - 1][k]; + --k; + if (i >= off) + res |= 1ULL << (i - off); + } + --nn; + ++i; + } + if (n - nr - 1 >= (number_type)off and n - nr - 1 <= (number_type)(off + len - 1)) + { + res |= 1ULL << ((n - nr - 1) - off); + } + return res; + } + static inline uint16_t decode_popcount(uint16_t k, number_type nr, uint16_t off) + { +#ifndef RRR_NO_OPT + if (k == n) + { + return off; + } + else if (k == 0) + { + return 0; + } + else if (k == 1) + { + return (n - nr - 1) < off; + } +#endif + uint16_t result = 0; + uint16_t nn = n; + if (k + 1 < binomial::data.BINARY_SEARCH_THRESHOLD + 1) + { + while (k > 1) + { + uint16_t nn_lb = k, + nn_rb = nn + 1; + while (nn_lb < nn_rb) + { + uint16_t nn_mid = (nn_lb + nn_rb) / 2; + if (nr >= binomial::data.table[nn_mid - 1][k]) + { + nn_lb = nn_mid + 1; + } + else + { + nn_rb = nn_mid; + } + } + nn = nn_lb - 1; + if (n - nn >= off) + { + return result; + } + ++result; + nr -= binomial::data.table[nn - 1][k]; + --k; + --nn; + } + } + else + { + int i = 0; + while (k > 1) + { + if (i >= off) + { + return result; + } + if (nr >= binomial::data.table[nn - 1][k]) + { + nr -= binomial::data.table[nn - 1][k]; + --k; + ++result; + } + --nn; + ++i; + } + } + return result + ((n - nr - 1) < off); + } + static inline uint16_t decode_select(uint16_t k, number_type & nr, uint16_t sel) + { +#ifndef RRR_NO_OPT + if (k == n) + { + return sel - 1; + } + else if (k == 1 and sel == 1) + { + return n - nr - 1; + } +#endif + uint16_t nn = n; + if (sel + 1 < binomial::data.BINARY_SEARCH_THRESHOLD + 1) + { + while (sel > 0) + { + uint16_t nn_lb = k, nn_rb = nn + 1; + while (nn_lb < nn_rb) + { + uint16_t nn_mid = (nn_lb + nn_rb) / 2; + if (nr >= binomial::data.table[nn_mid - 1][k]) + { + nn_lb = nn_mid + 1; + } + else + { + nn_rb = nn_mid; + } + } + nn = nn_lb - 1; + nr -= binomial::data.table[nn - 1][k]; + --sel; + --nn; + --k; + } + return n - nn - 1; + } + else + { + int i = 0; + while (sel > 0) + { + if (nr >= binomial::data.table[nn - 1][k]) + { + nr -= binomial::data.table[nn - 1][k]; + --sel; + --k; + } + --nn; + ++i; + } + return i - 1; + } + } + template + static inline uint16_t decode_select_bitpattern(uint16_t k, number_type & nr, uint16_t sel) + { + int i = 0; + uint8_t decoded_pattern = 0; + uint8_t decoded_len = 0; + uint16_t nn = n; + while (sel > 0) + { + decoded_pattern = decoded_pattern << 1; + ++decoded_len; + if (nr >= binomial::data.table[nn - 1][k]) + { + nr -= binomial::data.table[nn - 1][k]; + decoded_pattern |= 1; + --k; + } + --nn; + ++i; + if (decoded_len == len) + { + if (decoded_pattern == pattern) + { + --sel; + } + decoded_pattern = 0; + decoded_len = 0; + } + } + return i - len; + } +}; +} +#endif +namespace sdsl +{ +template , uint16_t t_k = 32> +class rank_support_rrr; +template , uint16_t t_k = 32> +class select_support_rrr; +template , uint16_t t_k = 32> +class rrr_vector +{ + static_assert(t_bs >= 3 and t_bs <= 256, "rrr_vector: block size t_bs must be 3 <= t_bs <= 256."); + static_assert(t_k > 1, "rrr_vector: t_k must be > 0."); +public: + typedef bit_vector::size_type size_type; + typedef bit_vector::value_type value_type; + typedef bit_vector::difference_type difference_type; + typedef t_rac rac_type; + typedef random_access_const_iterator iterator; + typedef iterator const_iterator; + typedef bv_tag index_category; + typedef rank_support_rrr<1, t_bs, t_rac, t_k> rank_1_type; + typedef rank_support_rrr<0, t_bs, t_rac, t_k> rank_0_type; + typedef select_support_rrr<1, t_bs, t_rac, t_k> select_1_type; + typedef select_support_rrr<0, t_bs, t_rac, t_k> select_0_type; + friend class rank_support_rrr<0, t_bs, t_rac, t_k>; + friend class rank_support_rrr<1, t_bs, t_rac, t_k>; + friend class select_support_rrr<0, t_bs, t_rac, t_k>; + friend class select_support_rrr<1, t_bs, t_rac, t_k>; + typedef rrr_helper rrr_helper_type; + typedef typename rrr_helper_type::number_type number_type; + enum + { + block_size = t_bs + }; +private: + size_type m_size = 0; + rac_type m_bt; + bit_vector m_btnr; + int_vector<> m_btnrp; + int_vector<> m_rank; + bit_vector m_invert; +public: + rac_type const & bt = m_bt; + bit_vector const & btnr = m_btnr; + rrr_vector(){}; + rrr_vector(rrr_vector const & v) : + m_size(v.m_size), + m_bt(v.m_bt), + m_btnr(v.m_btnr), + m_btnrp(v.m_btnrp), + m_rank(v.m_rank), + m_invert(v.m_invert) + {} + rrr_vector(rrr_vector && v) + { + *this = std::move(v); + } + rrr_vector & operator=(rrr_vector const & v) + { + if (this != &v) + { + rrr_vector tmp(v); + *this = std::move(tmp); + } + return *this; + } + rrr_vector & operator=(rrr_vector && v) + { + if (this != &v) + { + m_size = v.m_size; + m_bt = std::move(v.m_bt); + m_btnr = std::move(v.m_btnr); + m_btnrp = std::move(v.m_btnrp); + m_rank = std::move(v.m_rank); + m_invert = std::move(v.m_invert); + } + return *this; + } + rrr_vector(bit_vector const & bv) + { + m_size = bv.size(); + int_vector<> bt_array; + bt_array.width(bits::hi(t_bs) + 1); + bt_array.resize((m_size + t_bs) / ((size_type)t_bs)); + size_type pos = 0, i = 0, x; + size_type btnr_pos = 0; + size_type sum_rank = 0; + while (pos + t_bs <= m_size) + { + bt_array[i++] = x = rrr_helper_type::get_bt(bv, pos, t_bs); + sum_rank += x; + btnr_pos += rrr_helper_type::space_for_bt(x); + pos += t_bs; + } + if (pos < m_size) + { + bt_array[i++] = x = rrr_helper_type::get_bt(bv, pos, m_size - pos); + sum_rank += x; + btnr_pos += rrr_helper_type::space_for_bt(x); + } + m_btnr = bit_vector(std::max(btnr_pos, (size_type)64), 0); + m_btnrp = int_vector<>((bt_array.size() + t_k - 1) / t_k, 0, bits::hi(btnr_pos) + 1); + m_rank = + int_vector<>((bt_array.size() + t_k - 1) / t_k + ((m_size % (t_k * t_bs)) > 0), 0, bits::hi(sum_rank) + 1); + m_invert = bit_vector((bt_array.size() + t_k - 1) / t_k, 0); + pos = 0; + i = 0; + btnr_pos = 0, sum_rank = 0; + bool invert = false; + while (pos + t_bs <= m_size) + { + if ((i % t_k) == (size_type)0) + { + m_btnrp[i / t_k] = btnr_pos; + m_rank[i / t_k] = sum_rank; + if (i + t_k <= bt_array.size()) + { + size_type gt_half_t_bs = 0; + for (size_type j = i; j < i + t_k; ++j) + { + if (bt_array[j] > t_bs / 2) + ++gt_half_t_bs; + } + if (gt_half_t_bs > (t_k / 2)) + { + m_invert[i / t_k] = 1; + for (size_type j = i; j < i + t_k; ++j) + { + bt_array[j] = t_bs - bt_array[j]; + } + invert = true; + } + else + { + invert = false; + } + } + else + { + invert = false; + } + } + uint16_t space_for_bt = rrr_helper_type::space_for_bt(x = bt_array[i++]); + sum_rank += (invert ? (t_bs - x) : x); + if (space_for_bt) + { + number_type bin = rrr_helper_type::decode_btnr(bv, pos, t_bs); + number_type nr = rrr_helper_type::bin_to_nr(bin); + rrr_helper_type::set_bt(m_btnr, btnr_pos, nr, space_for_bt); + } + btnr_pos += space_for_bt; + pos += t_bs; + } + if (pos < m_size) + { + if ((i % t_k) == (size_type)0) + { + m_btnrp[i / t_k] = btnr_pos; + m_rank[i / t_k] = sum_rank; + m_invert[i / t_k] = 0; + invert = false; + } + uint16_t space_for_bt = rrr_helper_type::space_for_bt(x = bt_array[i++]); + assert(i == bt_array.size()); + sum_rank += invert ? (t_bs - x) : x; + if (space_for_bt) + { + number_type bin = rrr_helper_type::decode_btnr(bv, pos, m_size - pos); + number_type nr = rrr_helper_type::bin_to_nr(bin); + rrr_helper_type::set_bt(m_btnr, btnr_pos, nr, space_for_bt); + } + btnr_pos += space_for_bt; + assert(m_rank.size() - 1 == ((i + t_k - 1) / t_k)); + } + else + { + assert(m_rank.size() - 1 == ((i + t_k - 1) / t_k)); + } + m_rank[m_rank.size() - 1] = sum_rank; + m_bt = bt_array; + } + value_type operator[](size_type i) const + { + size_type bt_idx = i / t_bs; + uint16_t bt = m_bt[bt_idx]; + size_type sample_pos = bt_idx / t_k; + if (m_invert[sample_pos]) + bt = t_bs - bt; +#ifndef RRR_NO_OPT + if (bt == 0 or bt == t_bs) + { + return bt > 0; + } +#endif + uint16_t off = i % t_bs; + size_type btnrp = m_btnrp[sample_pos]; + for (size_type j = sample_pos * t_k; j < bt_idx; ++j) + { + btnrp += rrr_helper_type::space_for_bt(m_bt[j]); + } + uint16_t btnrlen = rrr_helper_type::space_for_bt(bt); + number_type btnr = rrr_helper_type::decode_btnr(m_btnr, btnrp, btnrlen); + return rrr_helper_type::decode_bit(bt, btnr, off); + } + uint64_t get_int(size_type idx, uint8_t len = 64) const + { + uint64_t res = 0; + size_type bb_idx = idx / t_bs; + size_type bb_off = idx % t_bs; + uint16_t bt = m_bt[bb_idx]; + size_type sample_pos = bb_idx / t_k; + size_type eb_idx = (idx + len - 1) / t_bs; + if (bb_idx == eb_idx) + { + if (m_invert[sample_pos]) + bt = t_bs - bt; + if (bt == 0) + { + res = 0; + } + else if (bt == t_bs and t_bs <= 64) + { + res = bits::lo_set[len]; + } + else + { + size_type btnrp = m_btnrp[sample_pos]; + for (size_type j = sample_pos * t_k; j < bb_idx; ++j) + { + btnrp += rrr_helper_type::space_for_bt(m_bt[j]); + } + uint16_t btnrlen = rrr_helper_type::space_for_bt(bt); + number_type btnr = rrr_helper_type::decode_btnr(m_btnr, btnrp, btnrlen); + res = rrr_helper_type::decode_int(bt, btnr, bb_off, len); + } + } + else + { + uint16_t b_len = t_bs - bb_off; + uint16_t b_len_sum = 0; + do + { + res |= get_int(idx, b_len) << b_len_sum; + idx += b_len; + b_len_sum += b_len; + len -= b_len; + b_len = t_bs; + b_len = std::min((uint16_t)len, b_len); + } + while (len > 0); + } + return res; + } + size_type size() const + { + return m_size; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += m_bt.serialize(out, child, "bt"); + written_bytes += m_btnr.serialize(out, child, "btnr"); + written_bytes += m_btnrp.serialize(out, child, "btnrp"); + written_bytes += m_rank.serialize(out, child, "rank_samples"); + written_bytes += m_invert.serialize(out, child, "invert"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(m_size, in); + m_bt.load(in); + m_btnr.load(in); + m_btnrp.load(in); + m_rank.load(in); + m_invert.load(in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_bt)); + ar(CEREAL_NVP(m_btnr)); + ar(CEREAL_NVP(m_btnrp)); + ar(CEREAL_NVP(m_rank)); + ar(CEREAL_NVP(m_invert)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_bt)); + ar(CEREAL_NVP(m_btnr)); + ar(CEREAL_NVP(m_btnrp)); + ar(CEREAL_NVP(m_rank)); + ar(CEREAL_NVP(m_invert)); + } + iterator begin() const + { + return iterator(this, 0); + } + iterator end() const + { + return iterator(this, size()); + } + bool operator==(rrr_vector const & v) const + { + return m_size == v.m_size && m_bt == v.m_bt && m_btnr == v.m_btnr && m_btnrp == v.m_btnrp && m_rank == v.m_rank + && m_invert == v.m_invert; + } + bool operator!=(rrr_vector const & v) const + { + return !(*this == v); + } +}; +template +struct rank_support_rrr_trait +{ + typedef bit_vector::size_type size_type; + static size_type adjust_rank(size_type r, SDSL_UNUSED size_type n) + { + return r; + } +}; +template <> +struct rank_support_rrr_trait<0> +{ + typedef bit_vector::size_type size_type; + static size_type adjust_rank(size_type r, size_type n) + { + return n - r; + } +}; +template +class rank_support_rrr +{ + static_assert(t_b == 1u or t_b == 0u, "rank_support_rrr: bit pattern must be `0` or `1`"); +public: + typedef rrr_vector bit_vector_type; + typedef typename bit_vector_type::size_type size_type; + typedef typename bit_vector_type::rrr_helper_type rrr_helper_type; + typedef typename rrr_helper_type::number_type number_type; + enum + { + bit_pat = t_b + }; + enum + { + bit_pat_len = (uint8_t)1 + }; +private: + bit_vector_type const * m_v; +public: + explicit rank_support_rrr(bit_vector_type const * v = nullptr) + { + set_vector(v); + } + const size_type rank(size_type i) const + { + assert(m_v != nullptr); + assert(i <= m_v->size()); + size_type bt_idx = i / t_bs; + size_type sample_pos = bt_idx / t_k; + size_type btnrp = m_v->m_btnrp[sample_pos]; + size_type rank = m_v->m_rank[sample_pos]; + if (sample_pos + 1 < m_v->m_rank.size()) + { + size_type diff_rank = m_v->m_rank[sample_pos + 1] - rank; +#ifndef RRR_NO_OPT + if (diff_rank == (size_type)0) + { + return rank_support_rrr_trait::adjust_rank(rank, i); + } + else if (diff_rank == (size_type)t_bs * t_k) + { + return rank_support_rrr_trait::adjust_rank(rank + i - sample_pos * t_k * t_bs, i); + } +#endif + } + bool const inv = m_v->m_invert[sample_pos]; + for (size_type j = sample_pos * t_k; j < bt_idx; ++j) + { + uint16_t r = m_v->m_bt[j]; + rank += (inv ? t_bs - r : r); + btnrp += rrr_helper_type::space_for_bt(r); + } + uint16_t off = i % t_bs; + if (!off) + { + return rank_support_rrr_trait::adjust_rank(rank, i); + } + uint16_t bt = inv ? t_bs - m_v->m_bt[bt_idx] : m_v->m_bt[bt_idx]; + uint16_t btnrlen = rrr_helper_type::space_for_bt(bt); + number_type btnr = rrr_helper_type::decode_btnr(m_v->m_btnr, btnrp, btnrlen); + uint16_t popcnt = rrr_helper_type::decode_popcount(bt, btnr, off); + return rank_support_rrr_trait::adjust_rank(rank + popcnt, i); + } + const size_type operator()(size_type i) const + { + return rank(i); + } + const size_type size() const + { + return m_v->size(); + } + void set_vector(bit_vector_type const * v = nullptr) + { + m_v = v; + } + rank_support_rrr & operator=(rank_support_rrr const & rs) + { + if (this != &rs) + { + set_vector(rs.m_v); + } + return *this; + } + void load(std::istream &, bit_vector_type const * v = nullptr) + { + set_vector(v); + } + size_type serialize(std::ostream &, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + structure_tree::add_size(child, 0); + return 0; + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const + {} + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t &) + {} + bool operator==(rank_support_rrr const & other) const noexcept + { + return *m_v == *other.m_v; + } + bool operator!=(rank_support_rrr const & other) const noexcept + { + return !(*this == other); + } +}; +template +class select_support_rrr +{ + static_assert(t_b == 1u or t_b == 0u, "select_support_rrr: bit pattern must be `0` or `1`"); +public: + typedef rrr_vector bit_vector_type; + typedef typename bit_vector_type::size_type size_type; + typedef typename bit_vector_type::rrr_helper_type rrr_helper_type; + typedef typename rrr_helper_type::number_type number_type; + enum + { + bit_pat = t_b + }; + enum + { + bit_pat_len = (uint8_t)1 + }; +private: + bit_vector_type const * m_v; + size_type select1(size_type i) const + { + if (m_v->m_rank[m_v->m_rank.size() - 1] < i) + return size(); + size_type begin = 0, end = m_v->m_rank.size() - 1; + size_type idx, rank; + while (end - begin > 1) + { + idx = (begin + end) >> 1; + rank = m_v->m_rank[idx]; + if (rank >= i) + end = idx; + else + { + begin = idx; + } + } + rank = m_v->m_rank[begin]; + idx = begin * t_k; + size_type diff_rank = m_v->m_rank[end] - rank; +#ifndef RRR_NO_OPT + if (diff_rank == (size_type)t_bs * t_k) + { + return idx * t_bs + i - rank - 1; + } +#endif + bool const inv = m_v->m_invert[begin]; + size_type btnrp = m_v->m_btnrp[begin]; + uint16_t bt = 0, btnrlen = 0; + while (i > rank) + { + bt = m_v->m_bt[idx++]; + bt = inv ? t_bs - bt : bt; + rank += bt; + btnrp += (btnrlen = rrr_helper_type::space_for_bt(bt)); + } + rank -= bt; + number_type btnr = rrr_helper_type::decode_btnr(m_v->m_btnr, btnrp - btnrlen, btnrlen); + return (idx - 1) * t_bs + rrr_helper_type::decode_select(bt, btnr, i - rank); + } + size_type select0(size_type i) const + { + if ((size() - m_v->m_rank[m_v->m_rank.size() - 1]) < i) + { + return size(); + } + size_type begin = 0, end = m_v->m_rank.size() - 1; + size_type idx, rank; + while (end - begin > 1) + { + idx = (begin + end) >> 1; + rank = idx * t_bs * t_k - m_v->m_rank[idx]; + if (rank >= i) + end = idx; + else + { + begin = idx; + } + } + rank = begin * t_bs * t_k - m_v->m_rank[begin]; + idx = begin * t_k; + if (m_v->m_rank[end] == m_v->m_rank[begin]) + { + return idx * t_bs + i - rank - 1; + } + bool const inv = m_v->m_invert[begin]; + size_type btnrp = m_v->m_btnrp[begin]; + uint16_t bt = 0, btnrlen = 0; + while (i > rank) + { + bt = m_v->m_bt[idx++]; + bt = inv ? t_bs - bt : bt; + rank += (t_bs - bt); + btnrp += (btnrlen = rrr_helper_type::space_for_bt(bt)); + } + rank -= (t_bs - bt); + number_type btnr = rrr_helper_type::decode_btnr(m_v->m_btnr, btnrp - btnrlen, btnrlen); + return (idx - 1) * t_bs + rrr_helper_type::template decode_select_bitpattern<0, 1>(bt, btnr, i - rank); + } +public: + explicit select_support_rrr(bit_vector_type const * v = nullptr) + { + set_vector(v); + } + size_type select(size_type i) const + { + return t_b ? select1(i) : select0(i); + } + const size_type operator()(size_type i) const + { + return select(i); + } + const size_type size() const + { + return m_v->size(); + } + void set_vector(bit_vector_type const * v = nullptr) + { + m_v = v; + } + select_support_rrr & operator=(select_support_rrr const & rs) + { + if (this != &rs) + { + set_vector(rs.m_v); + } + return *this; + } + void load(std::istream &, bit_vector_type const * v = nullptr) + { + set_vector(v); + } + size_type serialize(std::ostream &, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + structure_tree::add_size(child, 0); + return 0; + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const + {} + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t &) + {} + bool operator==(select_support_rrr const & other) const noexcept + { + return *m_v == *other.m_v; + } + bool operator!=(select_support_rrr const & other) const noexcept + { + return !(*this == other); + } +}; +} +#endif +#ifndef INCLUDED_SDSL_SD_VECTOR +#define INCLUDED_SDSL_SD_VECTOR +#include +#include +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_SELECT_SUPPORT_MCL +#define INCLUDED_SDSL_SELECT_SUPPORT_MCL +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_SELECT_SUPPORT +#define INCLUDED_SDSL_SELECT_SUPPORT +#include +#include +#include +#ifndef INCLUDED_SDSL_RANK_SUPPORT +#define INCLUDED_SDSL_RANK_SUPPORT +#include +#include +#include +namespace sdsl +{ +class structure_tree_node; +class rank_support +{ +protected: + bit_vector const * m_v; +public: + typedef bit_vector::size_type size_type; + rank_support(bit_vector const * v = nullptr); + rank_support(rank_support const &) = default; + rank_support(rank_support &&) = default; + rank_support & operator=(rank_support const &) = default; + rank_support & operator=(rank_support &&) = default; + virtual ~rank_support() + {} + virtual size_type rank(size_type i) const = 0; + virtual size_type operator()(size_type idx) const = 0; + virtual size_type serialize(std::ostream & out, structure_tree_node * v, std::string name) const = 0; + virtual void load(std::istream & in, bit_vector const * v = nullptr) = 0; + virtual void set_vector(bit_vector const * v = nullptr) = 0; +}; +inline rank_support::rank_support(bit_vector const * v) +{ + m_v = v; +} +template +struct rank_support_trait +{ + typedef rank_support::size_type size_type; + static size_type args_in_the_word(uint64_t, uint64_t &) + { + return 0; + } + static uint32_t word_rank(uint64_t const *, size_type) + { + return 0; + } + static uint32_t full_word_rank(uint64_t const *, size_type) + { + return 0; + } + static uint64_t init_carry() + { + return 0; + } +}; +template <> +struct rank_support_trait<0, 1> +{ + typedef rank_support::size_type size_type; + static size_type args_in_the_word(uint64_t w, uint64_t &) + { + return bits::cnt(~w); + } + static uint32_t word_rank(uint64_t const * data, size_type idx) + { + return bits::cnt((~*(data + (idx >> 6))) & bits::lo_set[idx & 0x3F]); + } + static uint32_t full_word_rank(uint64_t const * data, size_type idx) + { + return bits::cnt((~*(data + (idx >> 6)))); + } + static uint64_t init_carry() + { + return 0; + } +}; +template <> +struct rank_support_trait<1, 1> +{ + typedef rank_support::size_type size_type; + static size_type args_in_the_word(uint64_t w, uint64_t &) + { + return bits::cnt(w); + } + static uint32_t word_rank(uint64_t const * data, size_type idx) + { + return bits::cnt(*(data + (idx >> 6)) & bits::lo_set[idx & 0x3F]); + } + static uint32_t full_word_rank(uint64_t const * data, size_type idx) + { + return bits::cnt(*(data + (idx >> 6))); + } + static uint64_t init_carry() + { + return 0; + } +}; +template <> +struct rank_support_trait<10, 2> +{ + typedef rank_support::size_type size_type; + static size_type args_in_the_word(uint64_t w, uint64_t & carry) + { + return bits::cnt10(w, carry); + } + static uint32_t word_rank(uint64_t const * data, size_type idx) + { + data = data + (idx >> 6); + uint64_t carry = (idx > 63) ? *(data - 1) >> 63 : 0; + return bits::cnt(bits::map10(*data, carry) & bits::lo_set[idx & 0x3F]); + } + static uint32_t full_word_rank(uint64_t const * data, size_type idx) + { + data = data + (idx >> 6); + uint64_t carry = (idx > 63) ? *(data - 1) >> 63 : 0; + return bits::cnt(bits::map10(*data, carry)); + } + static uint64_t init_carry() + { + return 0; + } +}; +template <> +struct rank_support_trait<01, 2> +{ + typedef rank_support::size_type size_type; + static size_type args_in_the_word(uint64_t w, uint64_t & carry) + { + return bits::cnt01(w, carry); + } + static uint32_t word_rank(uint64_t const * data, size_type idx) + { + data = data + (idx >> 6); + uint64_t carry = (idx > 63) ? *(data - 1) >> 63 : 1; + return bits::cnt(bits::map01(*data, carry) & bits::lo_set[idx & 0x3F]); + } + static uint32_t full_word_rank(uint64_t const * data, size_type idx) + { + data = data + (idx >> 6); + uint64_t carry = (idx > 63) ? *(data - 1) >> 63 : 1; + return bits::cnt(bits::map01(*data, carry)); + } + static uint64_t init_carry() + { + return 1; + } +}; +template <> +struct rank_support_trait<00, 2> +{ + typedef rank_support::size_type size_type; + static size_type args_in_the_word(uint64_t w, uint64_t & carry) + { + size_type res = bits::cnt(~(w | (w << 1 | carry))); + carry = (w >> 63); + return res; + } + static uint32_t word_rank(uint64_t const * data, size_type idx) + { + data = data + (idx >> 6); + uint64_t carry = (idx > 63) ? *(data - 1) >> 63 : 1; + return bits::cnt((~(*data | ((*data) << 1 | carry))) & bits::lo_set[idx & 0x3F]); + } + static uint32_t full_word_rank(uint64_t const * data, size_type idx) + { + data = data + (idx >> 6); + uint64_t carry = (idx > 63) ? *(data - 1) >> 63 : 1; + return bits::cnt(~(*data | ((*data) << 1 | carry))); + } + static uint64_t init_carry() + { + return 1; + } +}; +template <> +struct rank_support_trait<11, 2> +{ + typedef rank_support::size_type size_type; + static size_type args_in_the_word(uint64_t w, uint64_t & carry) + { + size_type res = bits::cnt(w & (w << 1 | carry)); + carry = (w >> 63); + return res; + } + static uint32_t word_rank(uint64_t const * data, size_type idx) + { + data = data + (idx >> 6); + uint64_t carry = (idx > 63) ? *(data - 1) >> 63 : 0; + return bits::cnt((*data & ((*data) << 1 | carry)) & bits::lo_set[idx & 0x3F]); + } + static uint32_t full_word_rank(uint64_t const * data, size_type idx) + { + data = data + (idx >> 6); + uint64_t carry = (idx > 63) ? *(data - 1) >> 63 : 0; + return bits::cnt(*data & ((*data) << 1 | carry)); + } + static uint64_t init_carry() + { + return 0; + } +}; +} +#ifndef INCLUDED_SDSL_RANK_SUPPORT_SCAN +#define INCLUDED_SDSL_RANK_SUPPORT_SCAN +#include +#include +#include +#include +namespace sdsl +{ +class structure_tree_node; +template +class rank_support_scan : public rank_support +{ +private: + static_assert(t_b == 1u or t_b == 0u or t_b == 10u or t_b == 11u, + "rank_support_scan: bit pattern must be `0`,`1`,`10` or `01`"); + static_assert(t_pat_len == 1u or t_pat_len == 2u, "rank_support_scan: bit pattern length must be 1 or 2"); +public: + typedef bit_vector bit_vector_type; + enum + { + bit_pat = t_b + }; + enum + { + bit_pat_len = t_pat_len + }; +public: + explicit rank_support_scan(bit_vector const * v = nullptr) : rank_support(v){}; + rank_support_scan(rank_support_scan const & rs) = default; + rank_support_scan(rank_support_scan && rs) = default; + rank_support_scan & operator=(rank_support_scan const & rs) = default; + rank_support_scan & operator=(rank_support_scan && rs) = default; + size_type rank(size_type idx) const; + size_type operator()(size_type idx) const + { + return rank(idx); + }; + size_type size() const + { + return m_v->size(); + }; + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + return serialize_empty_object(out, v, name, this); + } + void load(std::istream &, int_vector<1> const * v = nullptr) + { + set_vector(v); + } + void set_vector(bit_vector const * v = nullptr) + { + m_v = v; + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const + {} + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t &) + {} + bool operator==(rank_support_scan const & other) const noexcept + { + return (*m_v == *other.m_v); + } + bool operator!=(rank_support_scan const & other) const noexcept + { + return !(*this == other); + } +}; +template +inline typename rank_support_scan::size_type +rank_support_scan::rank(size_type idx) const +{ + assert(m_v != nullptr); + assert(idx <= m_v->size()); + uint64_t const * p = m_v->data(); + size_type i = 0; + size_type result = 0; + while (i + 64 <= idx) + { + result += rank_support_trait::full_word_rank(p, i); + i += 64; + } + return result + rank_support_trait::word_rank(p, idx); +} +} +#endif +#ifndef INCLUDED_SDSL_RANK_SUPPORT_V +#define INCLUDED_SDSL_RANK_SUPPORT_V +#include +#include +#include +#include +namespace sdsl +{ +template +class rank_support_v : public rank_support +{ +private: + static_assert(t_b == 1u or t_b == 0u or t_b == 10u or t_b == 11, + "rank_support_v: bit pattern must be `0`,`1`,`10` or `01`"); + static_assert(t_pat_len == 1u or t_pat_len == 2u, "rank_support_v: bit pattern length must be 1 or 2"); +public: + typedef bit_vector bit_vector_type; + typedef rank_support_trait trait_type; + enum + { + bit_pat = t_b + }; + enum + { + bit_pat_len = t_pat_len + }; +private: + int_vector<64> m_basic_block; +public: + explicit rank_support_v(bit_vector const * v = nullptr) + { + set_vector(v); + if (v == nullptr) + { + return; + } + else if (v->empty()) + { + m_basic_block = int_vector<64>(2, 0); + return; + } + size_type basic_block_size = (((v->bit_size() + 63) >> 9) + 1) << 1; + m_basic_block.resize(basic_block_size); + if (m_basic_block.empty()) + return; + uint64_t const * data = m_v->data(); + size_type i, j = 0; + m_basic_block[0] = m_basic_block[1] = 0; + uint64_t carry = trait_type::init_carry(); + uint64_t sum = trait_type::args_in_the_word(*data, carry); + uint64_t second_level_cnt = 0; + for (i = 1; i < ((m_v->bit_size() + 63) >> 6); ++i) + { + if (!(i & 0x7)) + { + j += 2; + m_basic_block[j - 1] = second_level_cnt; + m_basic_block[j] = m_basic_block[j - 2] + sum; + second_level_cnt = sum = 0; + } + else + { + second_level_cnt |= sum << (63 - 9 * (i & 0x7)); + } + sum += trait_type::args_in_the_word(*(++data), carry); + } + if (i & 0x7) + { + second_level_cnt |= sum << (63 - 9 * (i & 0x7)); + m_basic_block[j + 1] = second_level_cnt; + } + else + { + j += 2; + m_basic_block[j - 1] = second_level_cnt; + m_basic_block[j] = m_basic_block[j - 2] + sum; + m_basic_block[j + 1] = 0; + } + } + rank_support_v(rank_support_v const &) = default; + rank_support_v(rank_support_v &&) = default; + rank_support_v & operator=(rank_support_v const &) = default; + rank_support_v & operator=(rank_support_v &&) = default; + size_type rank(size_type idx) const + { + assert(m_v != nullptr); + assert(idx <= m_v->size()); + uint64_t const * p = m_basic_block.data() + ((idx >> 8) & 0xFFFFFFFFFFFFFFFEULL); + if (idx & 0x3F) + return *p + ((*(p + 1) >> (63 - 9 * ((idx & 0x1FF) >> 6))) & 0x1FF) + + trait_type::word_rank(m_v->data(), idx); + else + return *p + ((*(p + 1) >> (63 - 9 * ((idx & 0x1FF) >> 6))) & 0x1FF); + } + inline size_type operator()(size_type idx) const + { + return rank(idx); + } + size_type size() const + { + return m_v->size(); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + size_type written_bytes = 0; + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + written_bytes += m_basic_block.serialize(out, child, "cumulative_counts"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in, int_vector<1> const * v = nullptr) + { + set_vector(v); + m_basic_block.load(in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_basic_block)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_basic_block)); + } + bool operator==(rank_support_v const & other) const noexcept + { + return m_basic_block == other.m_basic_block; + } + bool operator!=(rank_support_v const & other) const noexcept + { + return !(*this == other); + } + void set_vector(bit_vector const * v = nullptr) + { + m_v = v; + } +}; +} +#endif +#ifndef INCLUDED_SDSL_RANK_SUPPORT_VFIVE +#define INCLUDED_SDSL_RANK_SUPPORT_VFIVE +#include +#include +#include +#include +namespace sdsl +{ +template +class rank_support_v5 : public rank_support +{ +private: + static_assert(t_b == 1u or t_b == 0u or t_b == 10u or t_b == 11u, + "rank_support_v5: bit pattern must be `0`,`1`,`10` or `01` or `11`"); + static_assert(t_pat_len == 1u or t_pat_len == 2u, "rank_support_v5: bit pattern length must be 1 or 2"); +public: + typedef bit_vector bit_vector_type; + typedef rank_support_trait trait_type; + enum + { + bit_pat = t_b + }; + enum + { + bit_pat_len = t_pat_len + }; +private: + int_vector<64> m_basic_block; +public: + explicit rank_support_v5(bit_vector const * v = nullptr) + { + set_vector(v); + if (v == nullptr) + { + return; + } + else if (v->empty()) + { + m_basic_block = int_vector<64>(2, 0); + return; + } + size_type basic_block_size = (((v->bit_size() + 63) >> 11) + 1) << 1; + m_basic_block.resize(basic_block_size); + if (m_basic_block.empty()) + return; + uint64_t const * data = m_v->data(); + size_type i, j = 0; + m_basic_block[0] = m_basic_block[1] = 0; + uint64_t carry = trait_type::init_carry(); + uint64_t sum = trait_type::args_in_the_word(*data, carry); + uint64_t second_level_cnt = 0; + uint64_t cnt_words = 1; + for (i = 1; i < ((m_v->bit_size() + 63) >> 6); ++i, ++cnt_words) + { + if (cnt_words == 32) + { + j += 2; + m_basic_block[j - 1] = second_level_cnt; + m_basic_block[j] = m_basic_block[j - 2] + sum; + second_level_cnt = sum = cnt_words = 0; + } + else if ((cnt_words % 6) == 0) + { + second_level_cnt |= sum << (60 - 12 * (cnt_words / 6)); + } + sum += trait_type::args_in_the_word(*(++data), carry); + } + if ((cnt_words % 6) == 0) + { + second_level_cnt |= sum << (60 - 12 * (cnt_words / 6)); + } + if (cnt_words == 32) + { + j += 2; + m_basic_block[j - 1] = second_level_cnt; + m_basic_block[j] = m_basic_block[j - 2] + sum; + m_basic_block[j + 1] = 0; + } + else + { + m_basic_block[j + 1] = second_level_cnt; + } + } + rank_support_v5(rank_support_v5 const &) = default; + rank_support_v5(rank_support_v5 &&) = default; + rank_support_v5 & operator=(rank_support_v5 const &) = default; + rank_support_v5 & operator=(rank_support_v5 &&) = default; + size_type rank(size_type idx) const + { + assert(m_v != nullptr); + assert(idx <= m_v->size()); + uint64_t const * p = m_basic_block.data() + ((idx >> 10) & 0xFFFFFFFFFFFFFFFEULL); + size_type result = *p + ((*(p + 1) >> (60 - 12 * ((idx & 0x7FF) / (64 * 6)))) & 0x7FFULL) + + trait_type::word_rank(m_v->data(), idx); + idx -= (idx & 0x3F); + uint8_t to_do = ((idx >> 6) & 0x1FULL) % 6; + --idx; + while (to_do) + { + result += trait_type::full_word_rank(m_v->data(), idx); + --to_do; + idx -= 64; + } + return result; + } + inline size_type operator()(size_type idx) const + { + return rank(idx); + } + size_type size() const + { + return m_v->size(); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + size_type written_bytes = 0; + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + written_bytes += m_basic_block.serialize(out, child, "cumulative_counts"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in, bit_vector const * v = nullptr) + { + set_vector(v); + m_basic_block.load(in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_basic_block)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_basic_block)); + } + bool operator==(rank_support_v5 const & other) const noexcept + { + return m_basic_block == other.m_basic_block; + } + bool operator!=(rank_support_v5 const & other) const noexcept + { + return !(*this == other); + } + void set_vector(bit_vector const * v = nullptr) + { + m_v = v; + } +}; +} +#endif +#endif +namespace sdsl +{ +class structure_tree_node; +class select_support +{ +protected: + int_vector<1> const * m_v; +public: + typedef int_vector<1>::size_type size_type; + bit_vector const * vv; + select_support(int_vector<1> const * f_v = nullptr) : vv(f_v) + { + m_v = f_v; + } + select_support(select_support const & f_v); + virtual ~select_support(){}; + virtual size_type select(size_type i) const = 0; + virtual size_type operator()(size_type i) const = 0; + virtual size_type serialize(std::ostream & out, structure_tree_node * v, std::string name) const = 0; + virtual void load(std::istream & in, int_vector<1> const * v = nullptr) = 0; + virtual void set_vector(int_vector<1> const * v = nullptr) = 0; +}; +template +struct select_support_trait +{ + typedef select_support::size_type size_type; + static size_type arg_cnt(bit_vector const &) + { + return 0; + } + static size_type args_in_the_first_word(uint64_t, uint8_t, uint64_t) + { + return 0; + } + static size_type ith_arg_pos_in_the_first_word(uint64_t, size_type, uint8_t, uint64_t) + { + return 0; + } + static size_type args_in_the_word(uint64_t, uint64_t &) + { + return 0; + } + static size_type ith_arg_pos_in_the_word(uint64_t, size_type, uint64_t) + { + return 0; + } + static bool found_arg(size_type, bit_vector const &) + { + return 0; + } + static uint64_t init_carry(uint64_t const *, size_type) + { + return 0; + } + static uint64_t get_carry(uint64_t) + { + return 0; + } +}; +template <> +struct select_support_trait<0, 1> +{ + typedef select_support::size_type size_type; + static size_type arg_cnt(bit_vector const & v) + { + return v.bit_size() - util::cnt_one_bits(v); + } + static size_type args_in_the_first_word(uint64_t w, uint8_t offset, uint64_t) + { + return bits::cnt((~w) & bits::lo_unset[offset]); + } + static size_type ith_arg_pos_in_the_first_word(uint64_t w, size_type i, uint8_t offset, uint64_t) + { + return bits::sel(~w & bits::lo_unset[offset], (uint32_t)i); + } + static size_type args_in_the_word(uint64_t w, uint64_t &) + { + return bits::cnt(~w); + } + static size_type ith_arg_pos_in_the_word(uint64_t w, size_type i, uint64_t) + { + return bits::sel(~w, (uint32_t)i); + } + static bool found_arg(size_type i, bit_vector const & v) + { + return !v[i]; + } + static uint64_t init_carry(uint64_t const *, size_type) + { + return 0; + } + static uint64_t get_carry(uint64_t) + { + return 0; + } +}; +template <> +struct select_support_trait<1, 1> +{ + typedef select_support::size_type size_type; + static size_type arg_cnt(bit_vector const & v) + { + return util::cnt_one_bits(v); + } + static size_type args_in_the_first_word(uint64_t w, uint8_t offset, uint64_t) + { + return bits::cnt(w & bits::lo_unset[offset]); + } + static size_type ith_arg_pos_in_the_first_word(uint64_t w, size_type i, uint8_t offset, uint64_t) + { + return bits::sel(w & bits::lo_unset[offset], (uint32_t)i); + } + static size_type args_in_the_word(uint64_t w, uint64_t &) + { + return bits::cnt(w); + } + static size_type ith_arg_pos_in_the_word(uint64_t w, size_type i, uint64_t) + { + return bits::sel(w, (uint32_t)i); + } + static bool found_arg(size_type i, bit_vector const & v) + { + return v[i] == 1; + } + static uint64_t init_carry(uint64_t const *, size_type) + { + return 0; + } + static uint64_t get_carry(uint64_t) + { + return 0; + } +}; +template <> +struct select_support_trait<10, 2> +{ + typedef select_support::size_type size_type; + static size_type arg_cnt(bit_vector const & v) + { + return util::cnt_onezero_bits(v); + } + static size_type args_in_the_first_word(uint64_t w, uint8_t offset, uint64_t carry) + { + return bits::cnt(bits::map10(w, carry) & bits::lo_unset[offset]); + } + static size_type ith_arg_pos_in_the_first_word(uint64_t w, size_type i, uint8_t offset, uint64_t carry) + { + return bits::sel(bits::map10(w, carry) & bits::lo_unset[offset], (uint32_t)i); + } + static size_type args_in_the_word(uint64_t w, uint64_t & carry) + { + return bits::cnt10(w, carry); + } + static size_type ith_arg_pos_in_the_word(uint64_t w, size_type i, uint64_t carry) + { + return bits::sel(bits::map10(w, carry), (uint32_t)i); + } + static bool found_arg(size_type i, bit_vector const & v) + { + if (i > 0 and v[i - 1] and !v[i]) + return true; + return false; + } + static uint64_t init_carry(uint64_t const * data, size_type word_pos) + { + return word_pos ? (*(data - 1) >> 63) : 0; + } + static uint64_t get_carry(uint64_t w) + { + return w >> 63; + } +}; +template <> +struct select_support_trait<01, 2> +{ + typedef select_support::size_type size_type; + static size_type arg_cnt(bit_vector const & v) + { + return util::cnt_zeroone_bits(v); + } + static size_type args_in_the_first_word(uint64_t w, uint8_t offset, uint64_t carry) + { + return bits::cnt(bits::map01(w, carry) & bits::lo_unset[offset]); + } + static size_type ith_arg_pos_in_the_first_word(uint64_t w, size_type i, uint8_t offset, uint64_t carry) + { + return bits::sel(bits::map01(w, carry) & bits::lo_unset[offset], (uint32_t)i); + } + static size_type args_in_the_word(uint64_t w, uint64_t & carry) + { + return bits::cnt01(w, carry); + } + static size_type ith_arg_pos_in_the_word(uint64_t w, size_type i, uint64_t carry) + { + return bits::sel(bits::map01(w, carry), (uint32_t)i); + } + static bool found_arg(size_type i, bit_vector const & v) + { + if (i > 0 and !v[i - 1] and v[i]) + return true; + return false; + } + static uint64_t init_carry(uint64_t const * data, size_type word_pos) + { + return word_pos ? (*(data - 1) >> 63) : 1; + } + static uint64_t get_carry(uint64_t w) + { + return w >> 63; + } +}; +template <> +struct select_support_trait<00, 2> +{ + typedef select_support::size_type size_type; + static size_type arg_cnt(bit_vector const & v) + { + uint64_t const * data = v.data(); + if (v.empty()) + return 0; + uint64_t carry = rank_support_trait<00, 2>::init_carry(); + size_type result = 0; + for (auto end = v.data() + (v.size() >> 6); data < end; ++data) + { + result += rank_support_trait<00, 2>::args_in_the_word(*data, carry); + } + if (v.bit_size() & 0x3F) + { + result += rank_support_trait<00, 2>::args_in_the_word((*data) | bits::lo_unset[v.bit_size() & 0x3F], carry); + } + return result; + } + static size_type args_in_the_first_word(uint64_t w, uint8_t offset, uint64_t carry) + { + size_type res = 0; + if (offset == 0) + res = rank_support_trait<00, 2>::args_in_the_word(w, carry); + else + { + res = bits::cnt((~(w | (w << 1))) & bits::lo_unset[offset]); + } + return res; + } + static size_type ith_arg_pos_in_the_first_word(uint64_t w, size_type i, uint8_t offset, uint64_t carry) + { + return bits::sel((~(((w << 1) | carry) | w)) & bits::lo_unset[offset], i); + } + static size_type args_in_the_word(uint64_t w, uint64_t & carry) + { + return rank_support_trait<00, 2>::args_in_the_word(w, carry); + } + static size_type ith_arg_pos_in_the_word(uint64_t w, size_type i, uint64_t carry) + { + return bits::sel(~(((w << 1) | carry) | w), i); + } + static bool found_arg(size_type i, bit_vector const & v) + { + return i > 0 and !v[i - 1] and !v[i]; + } + static uint64_t init_carry(uint64_t const * data, size_type word_pos) + { + return word_pos ? (*(data - 1) >> 63) : 1; + } + static uint64_t get_carry(uint64_t w) + { + return w >> 63; + } +}; +template <> +struct select_support_trait<11, 2> +{ + typedef select_support::size_type size_type; + static size_type arg_cnt(bit_vector const & v) + { + uint64_t const * data = v.data(); + if (v.empty()) + return 0; + uint64_t carry = rank_support_trait<11, 2>::init_carry(); + size_type result = 0; + for (auto end = v.data() + (v.size() >> 6); data < end; ++data) + { + result += rank_support_trait<11, 2>::args_in_the_word(*data, carry); + } + if (v.bit_size() & 0x3F) + { + result += rank_support_trait<11, 2>::args_in_the_word((*data) & bits::lo_set[v.bit_size() & 0x3F], carry); + } + return result; + } + static size_type args_in_the_first_word(uint64_t w, uint8_t offset, uint64_t carry) + { + size_type res = 0; + if (offset == 0) + res = rank_support_trait<11, 2>::args_in_the_word(w, carry); + else + { + res = bits::cnt((w >> (offset - 1)) & (w >> offset)); + } + return res; + } + static size_type ith_arg_pos_in_the_first_word(uint64_t w, size_type i, uint8_t offset, uint64_t carry) + { + return bits::sel((((w << 1) | carry) & w) & bits::lo_unset[offset], i); + } + static size_type args_in_the_word(uint64_t w, uint64_t & carry) + { + return rank_support_trait<11, 2>::args_in_the_word(w, carry); + } + static size_type ith_arg_pos_in_the_word(uint64_t w, size_type i, uint64_t carry) + { + return bits::sel(((w << 1) | carry) & w, i); + } + static bool found_arg(size_type i, bit_vector const & v) + { + if (i > 0 and v[i - 1] and v[i]) + return true; + return false; + } + static uint64_t init_carry(uint64_t const * data, size_type word_pos) + { + return word_pos ? (*(data - 1) >> 63) : 0; + } + static uint64_t get_carry(uint64_t w) + { + return w >> 63; + } +}; +} +#endif +namespace sdsl +{ +template +class select_support_mcl : public select_support +{ +private: + static_assert(t_b == 1u or t_b == 0u or t_b == 10u or t_b == 11u, + "select_support_mcl: bit pattern must be `0`,`1`,`10`, `01`, or `11`"); + static_assert(t_pat_len == 1u or t_pat_len == 2u, "select_support_mcl: bit pattern length must be 1 or 2"); +public: + typedef bit_vector bit_vector_type; + enum + { + bit_pat = t_b + }; + enum + { + bit_pat_len = t_pat_len + }; +private: + uint32_t m_logn = 0, + m_logn2 = 0, + m_logn4 = 0; + int_vector<0> m_superblock; + int_vector<0> * m_longsuperblock = nullptr; + int_vector<0> * m_miniblock = nullptr; + size_type m_arg_cnt = 0; + void initData(); + void init_fast(bit_vector const * v = nullptr); +public: + explicit select_support_mcl(bit_vector const * v = nullptr); + select_support_mcl(select_support_mcl const & ss); + select_support_mcl(select_support_mcl && ss); + ~select_support_mcl(); + void init_slow(bit_vector const * v = nullptr); + inline size_type select(size_type i) const; + inline size_type operator()(size_type i) const; + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const; + void load(std::istream & in, bit_vector const * v = nullptr); + void set_vector(bit_vector const * v = nullptr); + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const; + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar); + select_support_mcl & operator=(select_support_mcl const & ss); + select_support_mcl & operator=(select_support_mcl &&); + bool operator==(select_support_mcl const & other) const noexcept; + bool operator!=(select_support_mcl const & other) const noexcept; +}; +template +select_support_mcl::select_support_mcl(bit_vector const * f_v) : select_support(f_v) +{ + if (t_pat_len > 1 or (vv != nullptr and vv->size() < 100000)) + init_slow(vv); + else + init_fast(vv); + return; +} +template +select_support_mcl::select_support_mcl(select_support_mcl const & ss) : + select_support(ss.m_v), + m_logn(ss.m_logn), + m_logn2(ss.m_logn2), + m_logn4(ss.m_logn4), + m_superblock(ss.m_superblock), + m_arg_cnt(ss.m_arg_cnt) +{ + size_type sb = (m_arg_cnt + 4095) >> 12; + if (ss.m_longsuperblock != nullptr) + { + m_longsuperblock = new int_vector<0>[sb]; + for (size_type i = 0; i < sb; ++i) + { + m_longsuperblock[i] = ss.m_longsuperblock[i]; + } + } + m_miniblock = nullptr; + if (ss.m_miniblock != nullptr) + { + m_miniblock = new int_vector<0>[sb]; + for (size_type i = 0; i < sb; ++i) + { + m_miniblock[i] = ss.m_miniblock[i]; + } + } +} +template +select_support_mcl::select_support_mcl(select_support_mcl && ss) : select_support(ss.m_v) +{ + *this = std::move(ss); +} +template +select_support_mcl & select_support_mcl::operator=(select_support_mcl const & ss) +{ + if (this != &ss) + { + select_support_mcl tmp(ss); + *this = std::move(tmp); + } + return *this; +} +template +select_support_mcl & select_support_mcl::operator=(select_support_mcl && ss) +{ + if (this != &ss) + { + m_logn = ss.m_logn; + m_logn2 = ss.m_logn2; + m_logn4 = ss.m_logn4; + m_superblock = std::move(ss.m_superblock); + m_arg_cnt = ss.m_arg_cnt; + m_v = ss.m_v; + delete[] m_longsuperblock; + m_longsuperblock = ss.m_longsuperblock; + ss.m_longsuperblock = nullptr; + delete[] m_miniblock; + m_miniblock = ss.m_miniblock; + ss.m_miniblock = nullptr; + } + return *this; +} +template +select_support_mcl::~select_support_mcl() +{ + delete[] m_longsuperblock; + delete[] m_miniblock; +} +template +void select_support_mcl::init_slow(bit_vector const * v) +{ + set_vector(v); + initData(); + if (m_v == nullptr) + return; + m_arg_cnt = select_support_trait::arg_cnt(*v); + const size_type SUPER_BLOCK_SIZE = 4096; + if (m_arg_cnt == 0) + return; + size_type sb = (m_arg_cnt + SUPER_BLOCK_SIZE - 1) / SUPER_BLOCK_SIZE; + delete[] m_miniblock; + m_miniblock = new int_vector<0>[sb]; + m_superblock = int_vector<0>(sb, 0, m_logn); + size_type arg_position[SUPER_BLOCK_SIZE], arg_cnt = 0; + size_type sb_cnt = 0; + for (size_type i = 0; i < v->size(); ++i) + { + if (select_support_trait::found_arg(i, *v)) + { + arg_position[arg_cnt % SUPER_BLOCK_SIZE] = i; + assert(arg_position[arg_cnt % SUPER_BLOCK_SIZE] == i); + ++arg_cnt; + if (arg_cnt % SUPER_BLOCK_SIZE == 0 or arg_cnt == m_arg_cnt) + { + assert(sb_cnt < sb); + m_superblock[sb_cnt] = arg_position[0]; + size_type pos_diff = arg_position[(arg_cnt - 1) % SUPER_BLOCK_SIZE] - arg_position[0]; + if (pos_diff > m_logn4) + { + if (m_longsuperblock == nullptr) + m_longsuperblock = new int_vector<0>[sb]; + m_longsuperblock[sb_cnt] = + int_vector<0>(SUPER_BLOCK_SIZE, + 0, + bits::hi(arg_position[(arg_cnt - 1) % SUPER_BLOCK_SIZE]) + 1); + for (size_type j = 0; j <= (arg_cnt - 1) % SUPER_BLOCK_SIZE; ++j) + m_longsuperblock[sb_cnt][j] = arg_position[j]; + } + else + { + m_miniblock[sb_cnt] = int_vector<0>(64, 0, bits::hi(pos_diff) + 1); + for (size_type j = 0; j <= (arg_cnt - 1) % SUPER_BLOCK_SIZE; j += 64) + { + m_miniblock[sb_cnt][j / 64] = arg_position[j] - arg_position[0]; + } + } + ++sb_cnt; + } + } + } +} +template +void select_support_mcl::init_fast(bit_vector const * v) +{ + set_vector(v); + initData(); + if (m_v == nullptr) + return; + m_arg_cnt = select_support_trait::arg_cnt(*v); + const size_type SUPER_BLOCK_SIZE = 64 * 64; + if (m_arg_cnt == 0) + return; + size_type sb = (m_arg_cnt + SUPER_BLOCK_SIZE - 1) / SUPER_BLOCK_SIZE; + delete[] m_miniblock; + m_miniblock = new int_vector<0>[sb]; + m_superblock = int_vector<0>(sb, 0, m_logn); + bit_vector::size_type arg_position[SUPER_BLOCK_SIZE]; + uint64_t const * data = v->data(); + uint64_t carry_new = 0; + size_type last_k64 = 1, sb_cnt = 0; + for (size_type i = 0, cnt_old = 0, cnt_new = 0, last_k64_sum = 1; i < (((v->bit_size() + 63) >> 6) << 6); + i += 64, ++data) + { + cnt_new += select_support_trait::args_in_the_word(*data, carry_new); + cnt_new = std::min(cnt_new, + m_arg_cnt); + if (cnt_new >= last_k64_sum) + { + arg_position[last_k64 - 1] = + i + + select_support_trait::ith_arg_pos_in_the_word(*data, + last_k64_sum - cnt_old, + carry_new); + last_k64 += 64; + last_k64_sum += 64; + if (last_k64 == SUPER_BLOCK_SIZE + 1) + { + m_superblock[sb_cnt] = arg_position[0]; + size_type pos_of_last_arg_in_the_block = arg_position[last_k64 - 65]; + for (size_type ii = arg_position[last_k64 - 65] + 1, j = last_k64 - 65; + ii < v->size() and j < SUPER_BLOCK_SIZE; + ++ii) + if (select_support_trait::found_arg(ii, *v)) + { + pos_of_last_arg_in_the_block = ii; + ++j; + } + size_type pos_diff = pos_of_last_arg_in_the_block - arg_position[0]; + if (pos_diff > m_logn4) + { + if (m_longsuperblock == nullptr) + m_longsuperblock = new int_vector<0>[sb + 1]; + m_longsuperblock[sb_cnt] = + int_vector<0>(SUPER_BLOCK_SIZE, 0, bits::hi(pos_of_last_arg_in_the_block) + 1); + for (size_type j = arg_position[0], k = 0; + k < SUPER_BLOCK_SIZE and j <= pos_of_last_arg_in_the_block; + ++j) + if (select_support_trait::found_arg(j, *v)) + { + if (k >= SUPER_BLOCK_SIZE) + { + for (size_type ii = 0; ii < SUPER_BLOCK_SIZE; ++ii) + { + std::cout << "(" << ii << "," << m_longsuperblock[sb_cnt][ii] << ") "; + } + std::cout << std::endl; + std::cout << "k=" << k << " SUPER_BLOCK_SIZE=" << SUPER_BLOCK_SIZE << std::endl; + std::cout << "pos_of_last_arg_in_the_block" << pos_of_last_arg_in_the_block + << std::endl; + std::cout.flush(); + } + m_longsuperblock[sb_cnt][k++] = j; + } + } + else + { + m_miniblock[sb_cnt] = int_vector<0>(64, 0, bits::hi(pos_diff) + 1); + for (size_type j = 0; j < SUPER_BLOCK_SIZE; j += 64) + { + m_miniblock[sb_cnt][j / 64] = arg_position[j] - arg_position[0]; + } + } + ++sb_cnt; + last_k64 = 1; + } + } + cnt_old = cnt_new; + } + if (last_k64 > 1) + { + if (m_longsuperblock == nullptr) + m_longsuperblock = new int_vector<0>[sb + 1]; + m_longsuperblock[sb_cnt] = int_vector<0>(SUPER_BLOCK_SIZE, 0, bits::hi(v->size() - 1) + 1); + for (size_type i = arg_position[0], k = 0; i < v->size(); ++i) + { + if (select_support_trait::found_arg(i, *v)) + { + m_longsuperblock[sb_cnt][k++] = i; + } + } + ++sb_cnt; + } +} +template +inline auto select_support_mcl::select(size_type i) const -> size_type +{ + assert(i > 0 and i <= m_arg_cnt); + i = i - 1; + size_type sb_idx = i >> 12; + size_type offset = i & 0xFFF; + if (m_longsuperblock != nullptr and !m_longsuperblock[sb_idx].empty()) + { + return m_longsuperblock[sb_idx][offset]; + } + else + { + if ((offset & 0x3F) == 0) + { + assert(sb_idx < m_superblock.size()); + assert((offset >> 6) < m_miniblock[sb_idx].size()); + return m_superblock[sb_idx] + m_miniblock[sb_idx][offset >> 6 ]; + } + else + { + i = i - (sb_idx << 12) - ((offset >> 6) << 6); + assert(i > 0); + size_type pos = m_superblock[sb_idx] + m_miniblock[sb_idx][offset >> 6] + 1; + size_type word_pos = pos >> 6; + size_type word_off = pos & 0x3F; + uint64_t const * data = m_v->data() + word_pos; + uint64_t carry = select_support_trait::init_carry(data, word_pos); + size_type args = select_support_trait::args_in_the_first_word(*data, word_off, carry); + if (args >= i) + { + return (word_pos << 6) + + select_support_trait::ith_arg_pos_in_the_first_word(*data, i, word_off, carry); + } + word_pos += 1; + size_type sum_args = args; + carry = select_support_trait::get_carry(*data); + uint64_t old_carry = carry; + args = select_support_trait::args_in_the_word(*(++data), carry); + while (sum_args + args < i) + { + sum_args += args; + assert(data + 1 < m_v->data() + ((m_v->bit_size() + 63) >> 6)); + old_carry = carry; + args = select_support_trait::args_in_the_word(*(++data), carry); + word_pos += 1; + } + return (word_pos << 6) + + select_support_trait::ith_arg_pos_in_the_word(*data, i - sum_args, old_carry); + } + } +} +template +inline auto select_support_mcl::operator()(size_type i) const -> size_type +{ + return select(i); +} +template +void select_support_mcl::initData() +{ + m_arg_cnt = 0; + if (nullptr == m_v) + { + m_logn = m_logn2 = m_logn4 = 0; + } + else + { + m_logn = bits::hi(((m_v->bit_size() + 63) >> 6) << 6) + 1; + m_logn2 = m_logn * m_logn; + m_logn4 = m_logn2 * m_logn2; + } + delete[] m_longsuperblock; + m_longsuperblock = nullptr; + delete[] m_miniblock; + m_miniblock = nullptr; +} +template +void select_support_mcl::set_vector(bit_vector const * v) +{ + m_v = v; +} +template +auto select_support_mcl::serialize(std::ostream & out, structure_tree_node * v, std::string name) const + -> size_type +{ + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + out.write((char *)&m_arg_cnt, sizeof(size_type) / sizeof(char)); + written_bytes = sizeof(size_type) / sizeof(char); + size_type sb = (m_arg_cnt + 4095) >> 12; + if (m_arg_cnt) + { + written_bytes += m_superblock.serialize(out, child, "superblock"); + bit_vector mini_or_long; + if (m_longsuperblock != nullptr) + { + mini_or_long.resize(sb); + for (size_type i = 0; i < sb; ++i) + mini_or_long[i] = !m_miniblock[i].empty(); + } + written_bytes += mini_or_long.serialize(out, child, "mini_or_long"); + size_type written_bytes_long = 0; + size_type written_bytes_mini = 0; + for (size_type i = 0; i < sb; ++i) + if (!mini_or_long.empty() and !mini_or_long[i]) + { + written_bytes_long += m_longsuperblock[i].serialize(out); + } + else + { + written_bytes_mini += m_miniblock[i].serialize(out); + } + written_bytes += written_bytes_long; + written_bytes += written_bytes_mini; + structure_tree_node * child_long = + structure_tree::add_child(child, "longsuperblock", util::class_name(m_longsuperblock)); + structure_tree::add_size(child_long, written_bytes_long); + structure_tree_node * child_mini = + structure_tree::add_child(child, "minisuperblock", util::class_name(m_miniblock)); + structure_tree::add_size(child_mini, written_bytes_mini); + } + structure_tree::add_size(child, written_bytes); + return written_bytes; +} +template +void select_support_mcl::load(std::istream & in, bit_vector const * v) +{ + set_vector(v); + initData(); + in.read((char *)&m_arg_cnt, sizeof(size_type) / sizeof(char)); + size_type sb = (m_arg_cnt + 4095) >> 12; + if (m_arg_cnt) + { + m_superblock.load(in); + delete[] m_miniblock; + m_miniblock = nullptr; + delete[] m_longsuperblock; + m_longsuperblock = nullptr; + bit_vector mini_or_long; + mini_or_long.load(in); + m_miniblock = new int_vector<0>[sb]; + if (!mini_or_long.empty()) + m_longsuperblock = new int_vector<0>[sb]; + for (size_type i = 0; i < sb; ++i) + if (!mini_or_long.empty() and not mini_or_long[i]) + { + m_longsuperblock[i].load(in); + } + else + { + m_miniblock[i].load(in); + } + } +} +template +template +void select_support_mcl::CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const +{ + ar(CEREAL_NVP(m_arg_cnt)); + ar(CEREAL_NVP(m_logn)); + ar(CEREAL_NVP(m_logn2)); + ar(CEREAL_NVP(m_logn4)); + size_type sb = (m_arg_cnt + 4095) >> 12; + if (m_arg_cnt) + { + ar(CEREAL_NVP(m_superblock)); + bit_vector mini_or_long; + if (m_longsuperblock != nullptr) + { + mini_or_long.resize(sb); + for (size_type i = 0; i < sb; ++i) + { + mini_or_long[i] = !m_miniblock[i].empty(); + } + } + ar(CEREAL_NVP(mini_or_long)); + for (size_type i = 0; i < sb; ++i) + { + if (!mini_or_long.empty() and !mini_or_long[i]) + { + ar(CEREAL_NVP(m_longsuperblock[i])); + } + else + { + ar(CEREAL_NVP(m_miniblock[i])); + } + } + } +} +template +template +void select_support_mcl::CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) +{ + delete[] m_longsuperblock; + m_longsuperblock = nullptr; + delete[] m_miniblock; + m_miniblock = nullptr; + ar(CEREAL_NVP(m_arg_cnt)); + ar(CEREAL_NVP(m_logn)); + ar(CEREAL_NVP(m_logn2)); + ar(CEREAL_NVP(m_logn4)); + size_type sb = (m_arg_cnt + 4095) >> 12; + if (m_arg_cnt) + { + ar(CEREAL_NVP(m_superblock)); + delete[] m_miniblock; + m_miniblock = nullptr; + delete[] m_longsuperblock; + m_longsuperblock = nullptr; + bit_vector mini_or_long; + ar(CEREAL_NVP(mini_or_long)); + m_miniblock = new int_vector<0>[sb]; + if (!mini_or_long.empty()) + { + m_longsuperblock = new int_vector<0>[sb]; + } + for (size_type i = 0; i < sb; ++i) + { + if (!mini_or_long.empty() and !mini_or_long[i]) + { + ar(CEREAL_NVP(m_longsuperblock[i])); + } + else + { + ar(CEREAL_NVP(m_miniblock[i])); + } + } + } +} +template +bool select_support_mcl::operator==(select_support_mcl const & other) const noexcept +{ + return (m_logn == other.m_logn) && (m_logn2 == other.m_logn2) && (m_logn4 == other.m_logn4) + && (m_superblock == other.m_superblock) && (m_arg_cnt == other.m_arg_cnt) + && ((m_longsuperblock == nullptr && other.m_longsuperblock == nullptr) + || (*m_longsuperblock == *other.m_longsuperblock)) + && ((m_miniblock == other.m_miniblock) || (*m_miniblock == *other.m_miniblock)); +} +template +bool select_support_mcl::operator!=(select_support_mcl const & other) const noexcept +{ + return !(*this == other); +} +} +#endif +namespace sdsl +{ +template +class rank_support_sd; +template +class select_support_sd; +class sd_vector_builder +{ + template + friend class sd_vector; +public: + typedef bit_vector::size_type size_type; +private: + size_type m_size, m_capacity; + size_type m_wl; + size_type m_tail, m_items; + size_type m_last_high, m_highpos; + int_vector<> m_low; + bit_vector m_high; +public: + sd_vector_builder(); + sd_vector_builder(size_type n, size_type m); + inline size_type size() const + { + return m_size; + } + inline size_type capacity() const + { + return m_capacity; + } + inline size_type tail() const + { + return m_tail; + } + inline size_type items() const + { + return m_items; + } + inline void set(size_type i) + { + assert(i >= m_tail && i < m_size); + assert(m_items < m_capacity); + size_type cur_high = i >> m_wl; + m_highpos += (cur_high - m_last_high); + m_last_high = cur_high; + m_low[m_items++] = i; + m_high[m_highpos++] = 1; + m_tail = i + 1; + } +}; +template +class sd_vector +{ +public: + typedef bit_vector::size_type size_type; + typedef size_type value_type; + typedef bit_vector::difference_type difference_type; + typedef random_access_const_iterator iterator; + typedef iterator const_iterator; + typedef bv_tag index_category; + typedef t_select_0 select_0_support_type; + typedef t_select_1 select_1_support_type; + typedef rank_support_sd<0, t_hi_bit_vector, select_1_support_type, select_0_support_type> rank_0_type; + typedef rank_support_sd<1, t_hi_bit_vector, select_1_support_type, select_0_support_type> rank_1_type; + typedef select_support_sd<0, t_hi_bit_vector, select_1_support_type, select_0_support_type> select_0_type; + typedef select_support_sd<1, t_hi_bit_vector, select_1_support_type, select_0_support_type> select_1_type; + typedef t_hi_bit_vector hi_bit_vector_type; +private: + size_type m_size = 0; + uint8_t m_wl = 0; + int_vector<> m_low; + hi_bit_vector_type m_high; + select_1_support_type m_high_1_select; + select_0_support_type m_high_0_select; +public: + uint8_t const & wl = m_wl; + hi_bit_vector_type const & high = m_high; + int_vector<> const & low = m_low; + select_1_support_type const & high_1_select = m_high_1_select; + select_0_support_type const & high_0_select = m_high_0_select; + sd_vector() + {} + sd_vector(sd_vector const & sd) : + m_size(sd.m_size), + m_wl(sd.m_wl), + m_low(sd.m_low), + m_high(sd.m_high), + m_high_1_select(sd.m_high_1_select), + m_high_0_select(sd.m_high_0_select) + { + m_high_1_select.set_vector(&m_high); + m_high_0_select.set_vector(&m_high); + } + sd_vector & operator=(sd_vector const & v) + { + if (this != &v) + { + sd_vector tmp(v); + *this = std::move(tmp); + } + return *this; + } + sd_vector & operator=(sd_vector && v) + { + if (this != &v) + { + m_size = v.m_size; + m_wl = v.m_wl; + m_low = std::move(v.m_low); + m_high = std::move(v.m_high); + m_high_1_select = std::move(v.m_high_1_select); + m_high_1_select.set_vector(&m_high); + m_high_0_select = std::move(v.m_high_0_select); + m_high_0_select.set_vector(&m_high); + } + return *this; + } + sd_vector(sd_vector && sd) + { + *this = std::move(sd); + } + sd_vector(bit_vector const & bv) + { + m_size = bv.size(); + size_type m = util::cnt_one_bits(bv); + uint8_t logm = bits::hi(m) + 1; + uint8_t logn = bits::hi(m_size) + 1; + if (logm == logn) + { + --logm; + } + m_wl = logn - logm; + m_low = int_vector<>(m, 0, m_wl); + bit_vector high = bit_vector(m + (1ULL << logm), 0); + uint64_t const * bvp = bv.data(); + for (size_type i = 0, mm = 0, last_high = 0, highpos = 0; i < (bv.size() + 63) / 64; ++i, ++bvp) + { + size_type position = 64 * i; + uint64_t w = *bvp; + while (w) + { + uint8_t offset = bits::lo(w); + w >>= offset; + position += offset; + if (position >= bv.size()) + break; + size_type cur_high = position >> m_wl; + highpos += (cur_high - last_high); + last_high = cur_high; + m_low[mm++] = position; + high[highpos++] = 1; + position += 1; + w >>= 1; + } + } + m_high = std::move(high); + util::init_support(m_high_1_select, &m_high); + util::init_support(m_high_0_select, &m_high); + } + template + sd_vector(const t_itr begin, const t_itr end) + { + if (begin == end) + { + return; + } + if (!is_sorted(begin, end)) + { + throw std::runtime_error("sd_vector: source list is not sorted."); + } + size_type m = std::distance(begin, end); + m_size = *(end - 1) + 1; + uint8_t logm = bits::hi(m) + 1; + uint8_t logn = bits::hi(m_size) + 1; + if (logm == logn) + { + --logm; + } + m_wl = logn - logm; + m_low = int_vector<>(m, 0, m_wl); + bit_vector high = bit_vector(m + (1ULL << logm), 0); + auto itr = begin; + size_type mm = 0, last_high = 0, highpos = 0; + while (itr != end) + { + auto position = *itr; + size_type cur_high = position >> m_wl; + highpos += (cur_high - last_high); + last_high = cur_high; + m_low[mm++] = position; + high[highpos++] = 1; + ++itr; + } + m_high = std::move(high); + util::init_support(m_high_1_select, &m_high); + util::init_support(m_high_0_select, &m_high); + } + sd_vector(sd_vector_builder & builder) + { + if (builder.items() != builder.capacity()) + { + throw std::runtime_error("sd_vector: builder is not at full capacity."); + } + m_size = builder.m_size; + m_wl = builder.m_wl; + m_low = std::move(builder.m_low); + m_high = std::move(builder.m_high); + util::init_support(m_high_1_select, &(this->m_high)); + util::init_support(m_high_0_select, &(this->m_high)); + builder = sd_vector_builder(); + } + value_type operator[](size_type i) const + { + size_type high_val = (i >> (m_wl)); + size_type sel_high = m_high_0_select(high_val + 1); + size_type rank_low = sel_high - high_val; + if (0 == rank_low) + return 0; + size_type val_low = i & bits::lo_set[m_wl]; + --sel_high; + --rank_low; + while (m_high[sel_high] and m_low[rank_low] > val_low) + { + if (sel_high > 0) + { + --sel_high; + --rank_low; + } + else + return 0; + } + return m_high[sel_high] and m_low[rank_low] == val_low; + } + uint64_t get_int(size_type idx, const uint8_t len = 64) const + { + uint64_t i = idx + len - 1; + uint64_t high_val = (i >> (m_wl)); + uint64_t sel_high = m_high_0_select(high_val + 1); + uint64_t rank_low = sel_high - high_val; + if (0 == rank_low) + return 0; + size_type val_low = i & bits::lo_set[m_wl]; + --sel_high; + --rank_low; + while (m_high[sel_high] and m_low[rank_low] > val_low) + { + if (sel_high > 0) + { + --sel_high; + --rank_low; + } + else + return 0; + } + uint64_t res = 0; + while (true) + { + while (!m_high[sel_high]) + { + if (sel_high > 0 and (high_val << m_wl) >= idx) + { + --sel_high; + --high_val; + } + else + { + return res; + } + } + while (m_high[sel_high]) + { + uint64_t val = (high_val << m_wl) + m_low[rank_low]; + if (val >= idx) + { + res |= 1ULL << (val - idx); + } + else + { + return res; + } + if (sel_high > 0) + { + --sel_high; + --rank_low; + } + else + { + return res; + } + } + } + } + size_type size() const + { + return m_size; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += write_member(m_wl, out, child, "wl"); + written_bytes += m_low.serialize(out, child, "low"); + written_bytes += m_high.serialize(out, child, "high"); + written_bytes += m_high_1_select.serialize(out, child, "high_1_select"); + written_bytes += m_high_0_select.serialize(out, child, "high_0_select"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(m_size, in); + read_member(m_wl, in); + m_low.load(in); + m_high.load(in); + m_high_1_select.load(in, &m_high); + m_high_0_select.load(in, &m_high); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_wl)); + ar(CEREAL_NVP(m_low)); + ar(CEREAL_NVP(m_high)); + ar(CEREAL_NVP(m_high_1_select)); + ar(CEREAL_NVP(m_high_0_select)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_wl)); + ar(CEREAL_NVP(m_low)); + ar(CEREAL_NVP(m_high)); + ar(CEREAL_NVP(m_high_1_select)); + m_high_1_select.set_vector(&m_high); + ar(CEREAL_NVP(m_high_0_select)); + m_high_0_select.set_vector(&m_high); + } + iterator begin() const + { + return iterator(this, 0); + } + iterator end() const + { + return iterator(this, size()); + } + bool operator==(sd_vector const & v) const + { + return m_size == v.m_size && m_wl == v.m_wl && m_low == v.m_low && m_high == v.m_high; + } + bool operator!=(sd_vector const & v) const + { + return !(*this == v); + } +}; +template <> +sd_vector<>::sd_vector(sd_vector_builder & builder); +template +struct rank_support_sd_trait +{ + typedef bit_vector::size_type size_type; + static size_type adjust_rank(size_type r, size_type) + { + return r; + } +}; +template <> +struct rank_support_sd_trait<0> +{ + typedef bit_vector::size_type size_type; + static size_type adjust_rank(size_type r, size_type n) + { + return n - r; + } +}; +template +class rank_support_sd +{ + static_assert(t_b == 1u or t_b == 0u, "rank_support_sd: bit pattern must be `0` or `1`"); +public: + typedef bit_vector::size_type size_type; + typedef sd_vector bit_vector_type; + enum + { + bit_pat = t_b + }; + enum + { + bit_pat_len = (uint8_t)1 + }; +private: + bit_vector_type const * m_v; +public: + explicit rank_support_sd(bit_vector_type const * v = nullptr) + { + set_vector(v); + } + size_type rank(size_type i) const + { + assert(m_v != nullptr); + assert(i <= m_v->size()); + size_type high_val = (i >> (m_v->wl)); + size_type sel_high = m_v->high_0_select(high_val + 1); + size_type rank_low = sel_high - high_val; + if (0 == rank_low) + return rank_support_sd_trait::adjust_rank(0, i); + size_type val_low = i & bits::lo_set[m_v->wl]; + do + { + if (!sel_high) + return rank_support_sd_trait::adjust_rank(0, i); + --sel_high; + --rank_low; + } + while (m_v->high[sel_high] and m_v->low[rank_low] >= val_low); + return rank_support_sd_trait::adjust_rank(rank_low + 1, i); + } + size_type operator()(size_type i) const + { + return rank(i); + } + size_type size() const + { + return m_v->size(); + } + void set_vector(bit_vector_type const * v = nullptr) + { + m_v = v; + } + void load(std::istream &, bit_vector_type const * v = nullptr) + { + set_vector(v); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + return serialize_empty_object(out, v, name, this); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const + {} + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t &) + {} + bool operator==(rank_support_sd const & other) const noexcept + { + return *m_v == *other.m_v; + } + bool operator!=(rank_support_sd const & other) const noexcept + { + return !(*this == other); + } +}; +template +struct select_support_sd_trait +{ + typedef bit_vector::size_type size_type; + static size_type select(size_type i, t_sd_vec const * v) + { + return v->low[i - 1] + + ((v->high_1_select(i) + 1 - i) << (v->wl)); + } +}; +template +struct select_support_sd_trait<0, t_sd_vec> +{ + typedef bit_vector::size_type size_type; + static size_type select(size_type i, t_sd_vec const * v) + { + auto ones = v->low.size(); + assert(0 < i and i <= v->size() - ones); + size_type lb = 1, rb = ones + 1; + size_type r0 = 0; + size_type pos = (size_type)-1; + while (lb < rb) + { + auto mid = lb + (rb - lb) / 2; + auto x = select_support_sd_trait<1, t_sd_vec>::select(mid, v); + auto rank0 = x + 1 - mid; + if (rank0 >= i) + { + rb = mid; + } + else + { + r0 = rank0; + pos = x; + lb = mid + 1; + } + } + return pos + i - r0; + } +}; +template +class select_support_sd +{ +public: + typedef bit_vector::size_type size_type; + typedef sd_vector bit_vector_type; + enum + { + bit_pat = t_b + }; + enum + { + bit_pat_len = (uint8_t)1 + }; +private: + bit_vector_type const * m_v; +public: + explicit select_support_sd(bit_vector_type const * v = nullptr) + { + set_vector(v); + } + size_type select(size_type i) const + { + return select_support_sd_trait::select(i, m_v); + } + size_type operator()(size_type i) const + { + return select(i); + } + size_type size() const + { + return m_v->size(); + } + void set_vector(bit_vector_type const * v = nullptr) + { + m_v = v; + } + void load(std::istream &, bit_vector_type const * v = nullptr) + { + set_vector(v); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + return serialize_empty_object(out, v, name, this); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t &) const + {} + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t &) + {} + bool operator==(select_support_sd const & other) const noexcept + { + return *m_v == *other.m_v; + } + bool operator!=(select_support_sd const & other) const noexcept + { + return !(*this == other); + } +}; +template > +class select_0_support_sd +{ +public: + typedef bit_vector::size_type size_type; + typedef t_sd_vector bit_vector_type; + using rank_1 = typename t_sd_vector::rank_1_type; + using sel0_type = typename t_sd_vector::select_0_type; + typedef bit_vector y_high_type; + enum + { + bit_pat = 0 + }; + enum + { + bit_pat_len = (uint8_t)1 + }; +private: + bit_vector_type const * m_v; + int_vector<> m_pointer; + int_vector<> m_rank1; +public: + explicit select_0_support_sd(bit_vector_type const * v = nullptr) + { + set_vector(v); + if (nullptr != m_v) + { + size_type rank_0 = 0; + const size_type bs = 1ULL << (m_v->wl); + size_type z = 0; + size_type rank1 = 0; + size_type zeros = m_v->size() - rank_1(m_v)(m_v->size()); + m_pointer = int_vector<>(zeros / (64 * bs) + 1, 0, bits::hi(m_v->high.size() / 64) + 1); + m_rank1 = int_vector<>(m_pointer.size(), 0, bits::hi(m_v->high.size()) + 1); + uint64_t w = 0; + for (size_type i = 0, sel0 = 1; i < m_v->high.size(); i += 64) + { + size_type old_rank1 = rank1; + w = m_v->high.get_int(i, 64); + rank1 += bits::cnt(w); + rank_0 = (i + 64) - rank1; + if (rank1 > 0 and (w >> 63) & 1) + { + uint64_t pos = rank_0 * bs + m_v->low[rank1 - 1]; + z = pos + 1 - rank1; + } + else + { + z = rank_0 * bs - rank1; + } + while (sel0 <= z and sel0 <= zeros) + { + m_pointer[(sel0 - 1) / (64 * bs)] = i / 64; + m_rank1[(sel0 - 1) / (64 * bs)] = old_rank1; + sel0 += 64 * bs; + } + } + } + } + size_type select(size_type i) const + { + const size_type bs = 1ULL << (m_v->wl); + size_type j = m_pointer[(i - 1) / (64 * bs)] * 64; + size_type rank1 = m_rank1[(i - 1) / (64 * bs)]; + size_type pos = 0; + if (rank1 > 0 and (m_v->high[j - 1]) & 1) + { + pos = (j - rank1) * bs + m_v->low[rank1 - 1]; + } + else + { + pos = (j - rank1) * bs; + } + uint64_t w = m_v->high.get_int(j, 64); + do + { + uint64_t _rank1 = rank1 + bits::cnt(w); + uint64_t _rank0 = 0; + if (_rank1 > 0 and (w >> 63) & 1) + { + pos = (j + 64 - _rank1) * bs + m_v->low[_rank1 - 1]; + _rank0 = pos + 1 - _rank1; + } + else + { + pos = (j + 64 - _rank1) * bs; + _rank0 = pos - _rank1; + } + if (_rank0 < i) + { + j += 64; + w = m_v->high.get_int(j, 64); + rank1 = _rank1; + } + else + { + break; + } + } + while (true); + do + { + uint64_t _rank1 = rank1 + bits::lt_cnt[w & 0xFFULL]; + uint64_t _rank0 = 0; + if (_rank1 > 0 and (w >> 7) & 1) + { + pos = (j + 8 - _rank1) * bs + m_v->low[_rank1 - 1]; + _rank0 = pos + 1 - _rank1; + } + else + { + pos = (j + 8 - _rank1) * bs; + _rank0 = pos - _rank1; + } + if (_rank0 < i) + { + j += 8; + w >>= 8; + rank1 = _rank1; + } + else + { + break; + } + } + while (true); + do + { + bool b = w & 1ULL; + w >>= 1; + ++j; + if (0 == b) + { + pos = (j - rank1) * bs; + size_type zeros = pos - rank1; + if (zeros >= i) + { + pos = pos - (zeros - i) - 1; + break; + } + } + else + { + pos = (j - 1 - rank1) * bs; + size_type one_pos = pos + m_v->low[rank1]; + ++rank1; + size_type zeros = one_pos + 1 - rank1; + if (zeros >= i) + { + pos = one_pos - (zeros - i) - 1; + break; + } + } + if (j % 64 == 0) + { + w = m_v->high.get_int(j, 64); + } + } + while (true); + return pos; + } + size_type operator()(size_type i) const + { + return select(i); + } + size_type size() const + { + return m_v->size(); + } + void set_vector(bit_vector_type const * v = nullptr) + { + m_v = v; + } + void load(std::istream & in, bit_vector_type const * v = nullptr) + { + m_pointer.load(in); + m_rank1.load(in); + set_vector(v); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_pointer.serialize(out, child, "pointer"); + written_bytes += m_rank1.serialize(out, child, "rank1"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_pointer)); + ar(CEREAL_NVP(m_rank1)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_pointer)); + ar(CEREAL_NVP(m_rank1)); + } + bool operator==(select_0_support_sd const & other) const noexcept + { + return (m_pointer == other.m_pointer) && (m_rank1 == other.m_rank1); + } + bool operator!=(select_0_support_sd const & other) const noexcept + { + return !(*this == other); + } +}; +inline sd_vector_builder::sd_vector_builder() : + m_size(0), + m_capacity(0), + m_wl(0), + m_tail(0), + m_items(0), + m_last_high(0), + m_highpos(0) +{} +inline sd_vector_builder::sd_vector_builder(size_type n, size_type m) : + m_size(n), + m_capacity(m), + m_wl(0), + m_tail(0), + m_items(0), + m_last_high(0), + m_highpos(0) +{ + if (m_capacity > m_size) + { + throw std::runtime_error("sd_vector_builder: requested capacity is larger than vector size."); + } + size_type logm = bits::hi(m_capacity) + 1, logn = bits::hi(m_size) + 1; + if (logm == logn) + { + logm--; + } + m_wl = logn - logm; + m_low = int_vector<>(m_capacity, 0, m_wl); + m_high = bit_vector(m_capacity + (1ULL << logm), 0); +} +template <> +inline sd_vector<>::sd_vector(sd_vector_builder & builder) +{ + if (builder.items() != builder.capacity()) + { + throw std::runtime_error("sd_vector: the builder is not full."); + } + m_size = builder.m_size; + m_wl = builder.m_wl; + m_low = std::move(builder.m_low); + m_high = std::move(builder.m_high); + util::init_support(m_high_1_select, &m_high); + util::init_support(m_high_0_select, &m_high); + builder = sd_vector_builder(); +} +} +#endif +#endif +#ifndef INCLUDED_SDSL_SUFFIX_ARRAYS +#define INCLUDED_SDSL_SUFFIX_ARRAYS +#include +#ifndef INCLUDED_CSA_ALPHABET_STRATEGY +#define INCLUDED_CSA_ALPHABET_STRATEGY +#include +#include +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_SELECT_SUPPORT_SCAN +#define INCLUDED_SDSL_SELECT_SUPPORT_SCAN +#include +#include +#include +#include +namespace sdsl +{ +class structure_tree_node; +template +class select_support_scan : public select_support +{ +private: + static_assert(t_b == 1u or t_b == 0u or t_b == 10u, + "select_support_scan: bit pattern must be `0`,`1`,`10` or `01`"); + static_assert(t_pat_len == 1u or t_pat_len == 2u, "select_support_scan: bit pattern length must be 1 or 2"); +public: + typedef bit_vector bit_vector_type; + enum + { + bit_pat = t_b + }; +public: + explicit select_support_scan(bit_vector const * v = nullptr) : select_support(v) + {} + select_support_scan(select_support_scan const & ss) : select_support(ss.m_v) + {} + inline size_type select(size_type i) const; + inline size_type operator()(size_type i) const + { + return select(i); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + return serialize_empty_object(out, v, name, this); + } + void load(std::istream &, SDSL_UNUSED bit_vector const * v = nullptr) + { + set_vector(v); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const; + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar); + void set_vector(bit_vector const * v = nullptr) + { + m_v = v; + } + select_support_scan & operator=(select_support_scan const & ss) + { + set_vector(ss.m_v); + return *this; + } + bool operator==(select_support_scan const & other) const noexcept + { + return (*m_v == *other.m_v); + } + bool operator!=(select_support_scan const & other) const noexcept + { + return !(*this == other); + } +}; +template +template +void select_support_scan::CEREAL_SAVE_FUNCTION_NAME(archive_t &) const +{} +template +template +void select_support_scan::CEREAL_LOAD_FUNCTION_NAME(archive_t &) +{} +template +inline typename select_support_scan::size_type +select_support_scan::select(size_type i) const +{ + uint64_t const * data = m_v->data(); + size_type word_pos = 0; + size_type word_off = 0; + uint64_t carry = select_support_trait::init_carry(data, word_pos); + size_type args = select_support_trait::args_in_the_first_word(*data, word_off, carry); + if (args >= i) + { + return (word_pos << 6) + + select_support_trait::ith_arg_pos_in_the_first_word(*data, i, word_off, carry); + } + word_pos += 1; + size_type sum_args = args; + carry = select_support_trait::get_carry(*data); + uint64_t old_carry = carry; + args = select_support_trait::args_in_the_word(*(++data), carry); + while (sum_args + args < i) + { + sum_args += args; + assert(data + 1 < m_v->data() + (m_v->capacity() >> 6)); + old_carry = carry; + args = select_support_trait::args_in_the_word(*(++data), carry); + word_pos += 1; + } + return (word_pos << 6) + + select_support_trait::ith_arg_pos_in_the_word(*data, i - sum_args, old_carry); +} +} +#endif +namespace sdsl +{ +class byte_alphabet; +template , + class select_support_type = select_support_scan<>, + class C_array_type = int_vector<>> +class succinct_byte_alphabet; +template , + class rank_support_type = typename bit_vector_type::rank_1_type, + class select_support_type = typename bit_vector_type::select_1_type, + class C_array_type = int_vector<>> +class int_alphabet; +template +constexpr char const * key_text() +{ + return conf::KEY_TEXT_INT; +} +template +constexpr char const * key_bwt() +{ + return conf::KEY_BWT_INT; +} +template <> +inline constexpr char const * key_text<8>() +{ + return conf::KEY_TEXT; +} +template <> +inline constexpr char const * key_bwt<8>() +{ + return conf::KEY_BWT; +} +template +struct alphabet_trait +{ + typedef byte_alphabet type; +}; +template <> +struct alphabet_trait +{ + typedef int_alphabet<> type; +}; +template +struct wt_alphabet_trait +{ + typedef t_enable type; +}; +template +struct wt_alphabet_trait::type> +{ + using type = typename alphabet_trait::type; +}; +class byte_alphabet +{ +public: + typedef int_vector<>::size_type size_type; + typedef int_vector<8> char2comp_type; + typedef int_vector<8> comp2char_type; + typedef int_vector<64> C_type; + typedef uint16_t sigma_type; + typedef uint8_t char_type; + typedef uint8_t comp_char_type; + typedef std::string string_type; + enum + { + int_width = 8 + }; + typedef byte_alphabet_tag alphabet_category; + char2comp_type const & char2comp; + comp2char_type const & comp2char; + C_type const & C; + sigma_type const & sigma; +private: + char2comp_type m_char2comp; + comp2char_type m_comp2char; + C_type m_C; + sigma_type m_sigma; +public: + byte_alphabet() : char2comp(m_char2comp), comp2char(m_comp2char), C(m_C), sigma(m_sigma), m_sigma(0) + {} + byte_alphabet(int_vector_buffer<8> & text_buf, int_vector_size_type len) : + char2comp(m_char2comp), + comp2char(m_comp2char), + C(m_C), + sigma(m_sigma) + { + m_sigma = 0; + if (0 == len or 0 == text_buf.size()) + return; + assert(len <= text_buf.size()); + m_C = int_vector<64>(257, 0); + m_char2comp = int_vector<8>(256, 0); + m_comp2char = int_vector<8>(256, 0); + for (size_type i = 0; i < len; ++i) + { + ++m_C[text_buf[i]]; + } + assert(1 == m_C[0]); + m_sigma = 0; + for (int i = 0; i < 256; ++i) + if (m_C[i]) + { + m_char2comp[i] = m_sigma; + m_comp2char[sigma] = i; + m_C[m_sigma] = m_C[i]; + ++m_sigma; + } + m_comp2char.resize(m_sigma); + m_C.resize(m_sigma + 1); + for (int i = (int)m_sigma; i > 0; --i) + m_C[i] = m_C[i - 1]; + m_C[0] = 0; + for (int i = 1; i <= (int)m_sigma; ++i) + m_C[i] += m_C[i - 1]; + assert(C[sigma] == len); + } + byte_alphabet(byte_alphabet const & bas) : + char2comp(m_char2comp), + comp2char(m_comp2char), + C(m_C), + sigma(m_sigma), + m_char2comp(bas.m_char2comp), + m_comp2char(bas.m_comp2char), + m_C(bas.m_C), + m_sigma(bas.m_sigma) + {} + byte_alphabet(byte_alphabet && bas) : + char2comp(m_char2comp), + comp2char(m_comp2char), + C(m_C), + sigma(m_sigma), + m_char2comp(std::move(bas.m_char2comp)), + m_comp2char(std::move(bas.m_comp2char)), + m_C(std::move(bas.m_C)), + m_sigma(bas.m_sigma) + {} + byte_alphabet & operator=(byte_alphabet const & bas) + { + if (this != &bas) + { + byte_alphabet tmp(bas); + *this = std::move(tmp); + } + return *this; + } + byte_alphabet & operator=(byte_alphabet && bas) + { + if (this != &bas) + { + m_char2comp = std::move(bas.m_char2comp); + m_comp2char = std::move(bas.m_comp2char); + m_C = std::move(bas.m_C); + m_sigma = std::move(bas.m_sigma); + } + return *this; + } + size_type serialize(std::ostream & out, structure_tree_node * v, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_char2comp.serialize(out, child, "m_char2comp"); + written_bytes += m_comp2char.serialize(out, child, "m_comp2char"); + written_bytes += m_C.serialize(out, child, "m_C"); + written_bytes += write_member(m_sigma, out, child, "m_sigma"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + m_char2comp.load(in); + m_comp2char.load(in); + m_C.load(in); + read_member(m_sigma, in); + } + bool operator==(byte_alphabet const & other) const noexcept + { + return (m_char2comp == other.m_char2comp) && (m_comp2char == other.m_comp2char) && (m_C == other.m_C) + && (m_sigma == other.m_sigma); + } + bool operator!=(byte_alphabet const & other) const noexcept + { + return !(*this == other); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_char2comp)); + ar(CEREAL_NVP(m_comp2char)); + ar(CEREAL_NVP(m_C)); + ar(CEREAL_NVP(m_sigma)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_char2comp)); + ar(CEREAL_NVP(m_comp2char)); + ar(CEREAL_NVP(m_C)); + ar(CEREAL_NVP(m_sigma)); + } +}; +template +class succinct_byte_alphabet +{ +public: + class char2comp_wrapper; + class comp2char_wrapper; + friend class char2comp_wrapper; + friend class comp2char_wrapper; + typedef int_vector<>::size_type size_type; + typedef char2comp_wrapper char2comp_type; + typedef comp2char_wrapper comp2char_type; + typedef C_array_type C_type; + typedef uint16_t sigma_type; + typedef uint8_t char_type; + typedef uint8_t comp_char_type; + typedef std::string string_type; + typedef byte_alphabet_tag alphabet_category; + enum + { + int_width = 8 + }; + class char2comp_wrapper + { + private: + succinct_byte_alphabet const * m_strat; + public: + char2comp_wrapper(succinct_byte_alphabet const * strat) : m_strat(strat) + {} + comp_char_type operator[](char_type c) const + { + if (c >= m_strat->m_char.size() or !m_strat->m_char[c]) + return (comp_char_type)0; + return (comp_char_type)m_strat->m_char_rank((size_type)c); + } + }; + class comp2char_wrapper + { + private: + succinct_byte_alphabet const * m_strat; + public: + comp2char_wrapper(succinct_byte_alphabet const * strat) : m_strat(strat) + {} + char_type operator[](comp_char_type c) const + { + return (char_type)m_strat->m_char_select(((size_type)c) + 1); + } + }; + const char2comp_type char2comp; + const comp2char_type comp2char; + C_type const & C; + sigma_type const & sigma; +private: + bit_vector_type m_char; + rank_support_type m_char_rank; + select_support_type m_char_select; + C_type m_C; + sigma_type m_sigma; +public: + succinct_byte_alphabet() : char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), m_sigma(0) + {} + succinct_byte_alphabet(int_vector_buffer<8> & text_buf, int_vector_size_type len) : + char2comp(this), + comp2char(this), + C(m_C), + sigma(m_sigma) + { + m_sigma = 0; + if (0 == len or 0 == text_buf.size()) + return; + assert(len <= text_buf.size()); + int_vector<64> D(257, 0); + bit_vector tmp_char(256, 0); + for (size_type i = 0; i < len; ++i) + { + ++D[text_buf[i]]; + } + assert(1 == D[0]); + m_sigma = 0; + for (int i = 0; i < 256; ++i) + if (D[i]) + { + tmp_char[i] = 1; + D[m_sigma] = D[i]; + ++m_sigma; + } + m_C = C_type(m_sigma + 1, 0, bits::hi(len) + 1); + for (int i = (int)m_sigma; i > 0; --i) + m_C[i] = D[i - 1]; + m_C[0] = 0; + for (int i = 1; i <= (int)m_sigma; ++i) + m_C[i] = m_C[i] + m_C[i - 1]; + assert(m_C[sigma] == len); + m_char = tmp_char; + util::init_support(m_char_rank, &m_char); + util::init_support(m_char_select, &m_char); + } + succinct_byte_alphabet(succinct_byte_alphabet const & strat) : + char2comp(this), + comp2char(this), + C(m_C), + sigma(m_sigma), + m_char(strat.m_char), + m_char_rank(strat.m_char_rank), + m_char_select(strat.m_char_select), + m_C(strat.m_C), + m_sigma(strat.m_sigma) + { + m_char_rank.set_vector(&m_char); + m_char_select.set_vector(&m_char); + } + succinct_byte_alphabet(succinct_byte_alphabet && strat) : + char2comp(this), + comp2char(this), + C(m_C), + sigma(m_sigma), + m_char(std::move(strat.m_char)), + m_char_rank(std::move(strat.m_char_rank)), + m_char_select(std::move(strat.m_char_select)), + m_C(std::move(strat.m_C)), + m_sigma(std::move(strat.m_sigma)) + { + m_char_rank.set_vector(&m_char); + m_char_select.set_vector(&m_char); + } + succinct_byte_alphabet & operator=(succinct_byte_alphabet const & strat) + { + if (this != &strat) + { + succinct_byte_alphabet tmp(strat); + *this = std::move(tmp); + } + return *this; + } + succinct_byte_alphabet & operator=(succinct_byte_alphabet && strat) + { + if (this != &strat) + { + m_char = std::move(strat.m_char); + m_char_rank = std::move(strat.m_char_rank); + m_char_rank.set_vector(&m_char); + m_char_select = std::move(strat.m_char_select); + m_char_select.set_vector(&m_char); + m_C = std::move(strat.m_C); + m_sigma = std::move(strat.m_sigma); + } + return *this; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_char.serialize(out, child, "m_char"); + written_bytes += m_char_rank.serialize(out, child, "m_char_rank"); + written_bytes += m_char_select.serialize(out, child, "m_char_select"); + written_bytes += m_C.serialize(out, child, "m_C"); + written_bytes += write_member(m_sigma, out, child, "m_sigma"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + m_char.load(in); + m_char_rank.load(in); + m_char_rank.set_vector(&m_char); + m_char_select.load(in); + m_char_select.set_vector(&m_char); + m_C.load(in); + read_member(m_sigma, in); + } + bool operator==(succinct_byte_alphabet const & other) const noexcept + { + return (m_char == other.m_char) && (m_char_rank == other.m_char_rank) && (m_char_select == other.m_char_select) + && (m_C == other.m_C) && (m_sigma == other.m_sigma); + } + bool operator!=(succinct_byte_alphabet const & other) const noexcept + { + return !(*this == other); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_char)); + ar(CEREAL_NVP(m_char_rank)); + ar(CEREAL_NVP(m_char_select)); + ar(CEREAL_NVP(m_C)); + ar(CEREAL_NVP(m_sigma)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_char)); + ar(CEREAL_NVP(m_char_rank)); + m_char_rank.set_vector(&m_char); + ar(CEREAL_NVP(m_char_select)); + m_char_select.set_vector(&m_char); + ar(CEREAL_NVP(m_C)); + ar(CEREAL_NVP(m_sigma)); + } +}; +template +void init_char_bitvector(bit_vector_type & char_bv, std::map const & D) +{ + auto largest_symbol = (--D.end())->first; + bit_vector tmp_char(largest_symbol + 1, 0); + for (auto const & x : D) + { + tmp_char[x.first] = 1; + } + char_bv = tmp_char; +} +template +void init_char_bitvector(sd_vector & char_bv, + std::map const & D) +{ + auto largest_symbol = (--D.end())->first; + sd_vector_builder builder(largest_symbol + 1, D.size()); + for (auto const & x : D) + { + builder.set(x.first); + } + char_bv = std::move(sd_vector(builder)); +} +class plain_byte_alphabet +{ +public: + class mapping_wrapper; + typedef int_vector<>::size_type size_type; + typedef mapping_wrapper char2comp_type; + typedef mapping_wrapper comp2char_type; + typedef int_vector<64> C_type; + typedef uint16_t sigma_type; + typedef uint8_t char_type; + typedef uint8_t comp_char_type; + typedef std::string string_type; + typedef byte_alphabet_tag alphabet_category; + enum + { + int_width = 8 + }; + class mapping_wrapper + { + public: + mapping_wrapper() = default; + constexpr char_type operator[](char_type const c) const noexcept + { + return c; + } + }; + const char2comp_type char2comp{}; + const comp2char_type comp2char{}; + C_type const & C; + sigma_type const & sigma; +private: + C_type m_C; + sigma_type m_sigma; +public: + plain_byte_alphabet() : C(m_C), sigma(m_sigma), m_sigma(0) + {} + plain_byte_alphabet(int_vector_buffer<8> & text_buf, int_vector_size_type len) : C(m_C), sigma(m_sigma) + { + m_sigma = 0; + if (0 == len || 0 == text_buf.size()) + return; + assert(len <= text_buf.size()); + m_C = int_vector<64>(257, 0); + for (size_type i = 0; i < len; ++i) + ++m_C[text_buf[i]]; + assert(1 == m_C[0]); + m_sigma = 255; + for (int i = 0; i < 256; ++i) + { + if (m_C[i]) + { + m_sigma = i + 1; + } + } + for (int i = (int)256; i > 0; --i) + m_C[i] = m_C[i - 1]; + m_C[0] = 0; + for (int i = 1; i <= (int)256; ++i) + m_C[i] += m_C[i - 1]; + assert(C[sigma] == len); + } + plain_byte_alphabet(plain_byte_alphabet const & strat) : + C(m_C), + sigma(m_sigma), + m_C(strat.m_C), + m_sigma(strat.m_sigma) + {} + plain_byte_alphabet(plain_byte_alphabet && strat) noexcept : + C(m_C), + sigma(m_sigma), + m_C(std::move(strat.m_C)), + m_sigma(strat.m_sigma) + {} + plain_byte_alphabet & operator=(plain_byte_alphabet const & strat) + { + if (this != &strat) + { + plain_byte_alphabet tmp(strat); + *this = std::move(tmp); + } + return *this; + } + plain_byte_alphabet & operator=(plain_byte_alphabet && strat) noexcept + { + if (this != &strat) + { + m_C = std::move(strat.m_C); + m_sigma = strat.m_sigma; + } + return *this; + } + size_type serialize(std::ostream & out, structure_tree_node * v, std::string const & name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_C.serialize(out, child, "m_C"); + written_bytes += write_member(m_sigma, out, child, "m_sigma"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + m_C.load(in); + read_member(m_sigma, in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_C)); + ar(CEREAL_NVP(m_sigma)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_C)); + ar(CEREAL_NVP(m_sigma)); + } + bool operator==(plain_byte_alphabet const & other) const noexcept + { + return (m_C == other.m_C) && (m_sigma == other.m_sigma); + } + bool operator!=(plain_byte_alphabet const & other) const noexcept + { + return !(*this == other); + } +}; +template +class int_alphabet +{ +public: + class char2comp_wrapper; + class comp2char_wrapper; + friend class char2comp_wrapper; + friend class comp2char_wrapper; + typedef int_vector<>::size_type size_type; + typedef char2comp_wrapper char2comp_type; + typedef comp2char_wrapper comp2char_type; + typedef C_array_type C_type; + typedef uint64_t sigma_type; + typedef uint64_t char_type; + typedef uint64_t comp_char_type; + typedef std::vector string_type; + typedef int_alphabet_tag alphabet_category; + enum + { + int_width = 0 + }; + class char2comp_wrapper + { + private: + int_alphabet const * m_strat; + public: + char2comp_wrapper(int_alphabet const * strat) : m_strat(strat) + {} + comp_char_type operator[](char_type c) const + { + if (m_strat->m_char.size() > 0) + { + if (c >= m_strat->m_char.size() or !m_strat->m_char[c]) + return (comp_char_type)0; + return (comp_char_type)m_strat->m_char_rank((size_type)c); + } + else + { + if (c >= m_strat->m_sigma) + return 0; + return (comp_char_type)c; + } + return 0; + } + }; + class comp2char_wrapper + { + private: + int_alphabet const * m_strat; + public: + comp2char_wrapper(int_alphabet const * strat) : m_strat(strat) + {} + char_type operator[](comp_char_type c) const + { + if (m_strat->m_char.size() > 0) + { + return (char_type)m_strat->m_char_select(((size_type)c) + 1); + } + else + { + return (char_type)c; + } + } + }; + const char2comp_type char2comp; + const comp2char_type comp2char; + C_type const & C; + sigma_type const & sigma; +private: + bit_vector_type m_char; + rank_support_type m_char_rank; + select_support_type m_char_select; + C_type m_C; + sigma_type m_sigma; + bool is_continuous_alphabet(std::map & D) + { + if (D.size() == 0) + { + return true; + } + else + { + return ((--D.end())->first + 1) == D.size(); + } + } +public: + int_alphabet() : char2comp(this), comp2char(this), C(m_C), sigma(m_sigma), m_sigma(0) + {} + int_alphabet(int_vector_buffer<0> & text_buf, int_vector_size_type len) : + char2comp(this), + comp2char(this), + C(m_C), + sigma(m_sigma) + { + m_sigma = 0; + if (0 == len or 0 == text_buf.size()) + return; + assert(len <= text_buf.size()); + std::map D; + for (size_type i = 0; i < len; ++i) + { + D[text_buf[i]]++; + } + m_sigma = D.size(); + if (is_continuous_alphabet(D)) + { + } + else + { + init_char_bitvector(m_char, D); + } + assert(D.find(0) != D.end() and 1 == D[0]); + m_C = C_type(m_sigma + 1, 0, bits::hi(len) + 1); + size_type sum = 0, idx = 0; + for (std::map::const_iterator it = D.begin(), end = D.end(); it != end; ++it) + { + m_C[idx++] = sum; + sum += it->second; + } + m_C[idx] = sum; + } + int_alphabet(int_alphabet const & strat) : + char2comp(this), + comp2char(this), + C(m_C), + sigma(m_sigma), + m_char(strat.m_char), + m_char_rank(strat.m_char_rank), + m_char_select(strat.m_char_select), + m_C(strat.m_C), + m_sigma(strat.m_sigma) + { + m_char_rank.set_vector(&m_char); + m_char_select.set_vector(&m_char); + } + int_alphabet(int_alphabet && strat) : + char2comp(this), + comp2char(this), + C(m_C), + sigma(m_sigma), + m_char(std::move(strat.m_char)), + m_char_rank(std::move(strat.m_char_rank)), + m_char_select(std::move(strat.m_char_select)), + m_C(std::move(strat.m_C)), + m_sigma(std::move(strat.m_sigma)) + { + m_char_rank.set_vector(&m_char); + m_char_select.set_vector(&m_char); + } + int_alphabet & operator=(int_alphabet const & strat) + { + if (this != &strat) + { + int_alphabet tmp(strat); + *this = std::move(tmp); + } + return *this; + } + int_alphabet & operator=(int_alphabet && strat) + { + if (this != &strat) + { + m_char = std::move(strat.m_char); + m_char_rank = std::move(strat.m_char_rank); + m_char_rank.set_vector(&m_char); + m_char_select = std::move(strat.m_char_select); + m_char_select.set_vector(&m_char); + m_C = std::move(strat.m_C); + m_sigma = std::move(strat.m_sigma); + } + return *this; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_char.serialize(out, child, "m_char"); + written_bytes += m_char_rank.serialize(out, child, "m_char_rank"); + written_bytes += m_char_select.serialize(out, child, "m_char_select"); + written_bytes += m_C.serialize(out, child, "m_C"); + written_bytes += write_member(m_sigma, out, child, "m_sigma"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + m_char.load(in); + m_char_rank.load(in); + m_char_rank.set_vector(&m_char); + m_char_select.load(in); + m_char_select.set_vector(&m_char); + m_C.load(in); + read_member(m_sigma, in); + } + bool operator==(int_alphabet const & other) const noexcept + { + return (m_char == other.m_char) && (m_char_rank == other.m_char_rank) && (m_char_select == other.m_char_select) + && (m_C == other.m_C) && (m_sigma == other.m_sigma); + } + bool operator!=(int_alphabet const & other) const noexcept + { + return !(*this == other); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_char)); + ar(CEREAL_NVP(m_char_rank)); + ar(CEREAL_NVP(m_char_select)); + ar(CEREAL_NVP(m_C)); + ar(CEREAL_NVP(m_sigma)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_char)); + ar(CEREAL_NVP(m_char_rank)); + m_char_rank.set_vector(&m_char); + ar(CEREAL_NVP(m_char_select)); + m_char_select.set_vector(&m_char); + ar(CEREAL_NVP(m_C)); + ar(CEREAL_NVP(m_sigma)); + } +}; +} +#endif +#ifndef INCLUDED_SDSL_CSA_SADA +#define INCLUDED_SDSL_CSA_SADA +#include +#include +#include +#include +#include +#include +#include +#ifndef INCLUDED_CSA_SAMPLING_STRATEGY +#define INCLUDED_CSA_SAMPLING_STRATEGY +#include +#include +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_CONSTRUCT +#define INCLUDED_SDSL_CONSTRUCT +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_CONSTRUCT_BWT +#define INCLUDED_SDSL_CONSTRUCT_BWT +#include +#include +#include +namespace sdsl +{ +template +void construct_bwt(cache_config & config) +{ + static_assert(t_width == 0 or t_width == 8, + "construct_bwt: width must be `0` for integer alphabet and `8` for byte alphabet"); + typedef int_vector<>::size_type size_type; + char const * KEY_TEXT = key_text_trait::KEY_TEXT; + char const * KEY_BWT = key_bwt_trait::KEY_BWT; + read_only_mapper text(KEY_TEXT, config); + size_type n = text.size(); + uint8_t bwt_width = text.width(); + std::string bwt_file = cache_file_name(KEY_BWT, config); + auto gen_bwt = [&n](auto & bwt, auto & text, auto & sa) + { + size_type to_add[2] = {(size_type)-1, n - 1}; + for (size_type i = 0; i < n; ++i) + { + bwt[i] = text[sa[i] + to_add[sa[i] == 0]]; + } + }; + if (is_ram_file(bwt_file)) + { + int_vector_mapper<> sa(conf::KEY_SA, config); + auto bwt = write_out_mapper::create(bwt_file, n, bwt_width); + gen_bwt(bwt, text, sa); + } + else + { + size_type buffer_size = 1000000; + std::string sa_file = cache_file_name(conf::KEY_SA, config); + int_vector_buffer<> sa_buf(sa_file, std::ios::in, buffer_size); + auto bwt = write_out_mapper::create(bwt_file, n, bwt_width); + gen_bwt(bwt, text, sa_buf); + } + register_cache_file(KEY_BWT, config); +} +} +#endif +#ifndef INCLUDED_SDSL_CONSTRUCT_LCP +#define INCLUDED_SDSL_CONSTRUCT_LCP +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_CONSTRUCT_ISA +#define INCLUDED_SDSL_CONSTRUCT_ISA +#include +#include +namespace sdsl +{ +inline void construct_isa(cache_config & config) +{ + typedef int_vector<>::size_type size_type; + if (!cache_file_exists(conf::KEY_ISA, config)) + { + int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, config)); + if (!sa_buf.is_open()) + { + throw std::ios_base::failure("cst_construct: Cannot load SA from file system!"); + } + int_vector<> isa(sa_buf.size()); + for (size_type i = 0; i < isa.size(); ++i) + { + isa[sa_buf[i]] = i; + } + store_to_cache(isa, conf::KEY_ISA, config); + } +} +} +#endif +#ifndef INCLUDED_SDSL_CONSTRUCT_LCP_HELPER +#define INCLUDED_SDSL_CONSTRUCT_LCP_HELPER +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +inline void insert_lcp_values(int_vector<> & partial_lcp, + bit_vector & index_done, + std::string lcp_file, + uint64_t max_lcp_value, + uint64_t lcp_value_offset) +{ + std::string tmp_lcp_file = lcp_file + "_TMP"; + const uint64_t buffer_size = 1000000; + typedef int_vector<>::size_type size_type; + int_vector_buffer<> lcp_buffer(lcp_file, std::ios::in, buffer_size); + uint64_t n = lcp_buffer.size(); + uint8_t int_width = bits::hi(max_lcp_value) + 1; + int_vector_buffer<> out_buf(tmp_lcp_file, std::ios::out, buffer_size, int_width); + for (size_type i = 0, calc_idx = 0; i < n; ++i) + { + if (index_done[i]) + { + out_buf[i] = lcp_buffer[i]; + } + else + { + if (partial_lcp[calc_idx]) + { + out_buf[i] = partial_lcp[calc_idx] + lcp_value_offset; + index_done[i] = true; + } + ++calc_idx; + } + } + lcp_buffer.close(); + out_buf.close(); + sdsl::rename(tmp_lcp_file, lcp_file); +} +template +void create_C_array(std::vector & C, tWT const & wt) +{ + uint64_t quantity; + std::vector cs(wt.sigma); + std::vector rank_c_i(wt.sigma); + std::vector rank_c_j(wt.sigma); + C = std::vector(257, 0); + interval_symbols(wt, 0, wt.size(), quantity, cs, rank_c_i, rank_c_j); + for (uint64_t i = 0; i < quantity; ++i) + { + unsigned char c = cs[i]; + C[c + 1] = rank_c_j[i]; + } + for (uint64_t i = 1; i < C.size() - 1; ++i) + { + C[i + 1] += C[i]; + } +} +class buffered_char_queue +{ + typedef bit_vector::size_type size_type; + typedef std::queue tQ; +private: + static const uint32_t m_buffer_size = 10000; + uint8_t m_write_buf[m_buffer_size]; + uint8_t m_read_buf[m_buffer_size]; + size_type m_widx; + size_type m_ridx; + bool m_sync; + size_type m_disk_buffered_blocks; + char m_c; + size_type m_rb; + size_type m_wb; + std::string m_file_name; + std::fstream m_stream; +public: + buffered_char_queue() : m_widx(0), m_ridx(0), m_sync(true), m_disk_buffered_blocks(0), m_c('?'), m_rb(0), m_wb(0) + {} + void init(std::string const & dir, char c) + { + m_c = c; + m_file_name = dir + "buffered_char_queue_" + util::to_string(util::pid()); + } + ~buffered_char_queue() + { + m_stream.close(); + sdsl::remove(m_file_name); + } + void push_back(uint8_t x) + { + m_write_buf[m_widx] = x; + if (m_sync) + { + m_read_buf[m_widx] = x; + } + ++m_widx; + if (m_widx == m_buffer_size) + { + if (!m_sync) + { + if (!m_stream.is_open()) + { + m_stream.open(m_file_name, std::ios::in | std::ios::out | std::ios::binary | std::ios::trunc); + } + m_stream.seekp(m_buffer_size * (m_wb++), std::ios::beg); + m_stream.write((char *)m_write_buf, m_buffer_size); + ++m_disk_buffered_blocks; + } + m_sync = 0; + m_widx = 0; + } + } + uint8_t pop_front() + { + uint8_t x = m_read_buf[m_ridx]; + ++m_ridx; + if (m_ridx == m_buffer_size) + { + if (m_disk_buffered_blocks > 0) + { + m_stream.seekg(m_buffer_size * (m_rb++), std::ios::beg); + m_stream.read((char *)m_read_buf, m_buffer_size); + --m_disk_buffered_blocks; + } + else + { + m_sync = 1; + memcpy(m_read_buf, m_write_buf, m_widx + 1); + } + m_ridx = 0; + } + return x; + } +}; +typedef std::list::size_type> tLI; +typedef std::vector::size_type> tVI; +template +void push_front_m_index(size_type_class i, + uint8_t c, + tLI (&m_list)[256], + uint8_t (&m_chars)[256], + size_type_class & m_char_count) +{ + if (m_list[c].empty()) + { + m_chars[m_char_count++] = c; + } + m_list[c].push_front(i); +} +template +void push_back_m_index(size_type_class i, + uint8_t c, + tLI (&m_list)[256], + uint8_t (&m_chars)[256], + size_type_class & m_char_count) +{ + if (m_list[c].empty()) + { + m_chars[m_char_count++] = c; + } + m_list[c].push_back(i); +} +} +#endif +#ifndef INCLUDED_SDSL_WT_ALGORITHM +#define INCLUDED_SDSL_WT_ALGORITHM +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_WT_HELPER +#define INCLUDED_SDSL_WT_HELPER +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +typedef std::array::size_type, 2> range_type; +typedef std::vector range_vec_type; +bool empty(range_type const & r); +int_vector<>::size_type size(range_type const & r); +template +void calculate_character_occurences(t_it begin, t_it end, t_rac & C) +{ + C = t_rac(); + for (auto it = begin; it != end; ++it) + { + uint64_t c = *it; + if (c >= C.size()) + { + C.resize(c + 1, 0); + } + ++C[c]; + } +} +template +void calculate_effective_alphabet_size(t_rac const & C, sigma_type & sigma) +{ + sigma = std::count_if(begin(C), + end(C), + [](decltype(*begin(C)) & x) + { + return x > 0; + }); +} +struct pc_node +{ + uint64_t freq; + uint64_t sym; + uint64_t parent; + uint64_t child[2]; + enum : uint64_t + { + undef = 0xFFFFFFFFFFFFFFFFULL + }; + pc_node(uint64_t freq = 0, + uint64_t sym = 0, + uint64_t parent = undef, + uint64_t child_left = undef, + uint64_t child_right = undef); +}; +template +struct _node +{ + using node_type = typename t_tree_strat_fat::node_type; + typedef uint64_t size_type; + uint64_t bv_pos = 0; + uint64_t bv_pos_rank = 0; + node_type parent = t_tree_strat_fat::undef; + node_type child[2] = {t_tree_strat_fat::undef, t_tree_strat_fat::undef}; + _node(uint64_t bv_pos = 0, + uint64_t bv_pos_rank = 0, + node_type parent = t_tree_strat_fat::undef, + node_type child_left = t_tree_strat_fat::undef, + node_type child_right = t_tree_strat_fat::undef) : + bv_pos(bv_pos), + bv_pos_rank(bv_pos_rank), + parent(parent) + { + child[0] = child_left; + child[1] = child_right; + } + _node(_node const &) = default; + _node & operator=(_node const & v) + { + if (this != &v) + { + bv_pos = v.bv_pos; + bv_pos_rank = v.bv_pos_rank; + parent = v.parent; + child[0] = v.child[0]; + child[1] = v.child[1]; + } + return *this; + } + _node & operator=(pc_node const & v) + { + bv_pos = v.freq; + bv_pos_rank = v.sym; + parent = v.parent; + child[0] = v.child[0]; + child[1] = v.child[1]; + return *this; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * st_child = structure_tree::add_child(v, name, util::class_name(*this)); + uint64_t written_bytes = 0; + written_bytes += write_member(bv_pos, out); + written_bytes += write_member(bv_pos_rank, out); + written_bytes += write_member(parent, out); + out.write((char *)child, 2 * sizeof(child[0])); + written_bytes += 2 * sizeof(child[0]); + structure_tree::add_size(st_child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(bv_pos, in); + read_member(bv_pos_rank, in); + read_member(parent, in); + in.read((char *)child, 2 * sizeof(child[0])); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(bv_pos)); + ar(CEREAL_NVP(bv_pos_rank)); + ar(CEREAL_NVP(parent)); + ar(CEREAL_NVP(child[0])); + ar(CEREAL_NVP(child[1])); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(bv_pos)); + ar(CEREAL_NVP(bv_pos_rank)); + ar(CEREAL_NVP(parent)); + ar(CEREAL_NVP(child[0])); + ar(CEREAL_NVP(child[1])); + } + bool operator==(_node const & other) const noexcept + { + return (bv_pos == other.bv_pos) && (bv_pos_rank == other.bv_pos_rank) && (parent == other.parent) + && (child[0] == other.child[0]) && (child[1] == other.child[1]); + } + bool operator!=(_node const & other) const noexcept + { + return !(*this == other); + } +}; +template +struct _byte_tree +{ + using alphabet_category = byte_alphabet_tag; + using value_type = uint8_t; + using node_type = uint16_t; + using data_node = _node<_byte_tree>; + enum : uint16_t + { + undef = 0xFFFF + }; + enum : uint32_t + { + fixed_sigma = 256 + }; + enum : uint8_t + { + int_width = 8 + }; + std::vector m_nodes; + node_type m_c_to_leaf[fixed_sigma]; + uint64_t m_path[fixed_sigma]; + _byte_tree() + {} + _byte_tree(std::vector const & temp_nodes, uint64_t & bv_size, t_wt const *) + { + m_nodes.resize(temp_nodes.size()); + m_nodes[0] = temp_nodes.back(); + bv_size = 0; + size_t node_cnt = 1; + node_type last_parent = undef; + std::deque q; + q.push_back(0); + while (!q.empty()) + { + node_type idx; + if (!t_dfs_shape) + { + idx = q.front(); + q.pop_front(); + } + else + { + idx = q.back(); + q.pop_back(); + } + uint64_t frq = m_nodes[idx].bv_pos; + m_nodes[idx].bv_pos = bv_size; + if (m_nodes[idx].child[0] != undef) + bv_size += frq; + if (idx > 0) + { + if (last_parent != m_nodes[idx].parent) + m_nodes[m_nodes[idx].parent].child[0] = idx; + else + m_nodes[m_nodes[idx].parent].child[1] = idx; + last_parent = m_nodes[idx].parent; + } + if (m_nodes[idx].child[0] != undef) + { + for (uint32_t k = 0; k < 2; ++k) + { + m_nodes[node_cnt] = temp_nodes[m_nodes[idx].child[k]]; + m_nodes[node_cnt].parent = idx; + q.push_back(node_cnt); + m_nodes[idx].child[k] = node_cnt++; + } + } + } + for (uint32_t i = 0; i < fixed_sigma; ++i) + m_c_to_leaf[i] = undef; + for (node_type v = 0; v < m_nodes.size(); ++v) + { + if (m_nodes[v].child[0] == undef) + m_c_to_leaf[(uint8_t)m_nodes[v].bv_pos_rank] = v; + } + for (uint32_t c = 0, prev_c = 0; c < fixed_sigma; ++c) + { + if (m_c_to_leaf[c] != undef) + { + node_type v = m_c_to_leaf[c]; + uint64_t pw = 0; + uint64_t pl = 0; + while (v != root()) + { + pw <<= 1; + if (m_nodes[m_nodes[v].parent].child[1] == v) + pw |= 1ULL; + ++pl; + v = m_nodes[v].parent; + } + if (pl > 56) + { + throw std::logic_error("Code depth greater than 56!!!"); + } + m_path[c] = pw | (pl << 56); + prev_c = c; + } + else + { + uint64_t pl = 0; + m_path[c] = prev_c | (pl << 56); + } + } + } + template + void init_node_ranks(t_rank_type const & rank) + { + for (uint64_t i = 0; i < m_nodes.size(); ++i) + { + if (m_nodes[i].child[0] != undef) + m_nodes[i].bv_pos_rank = rank.rank(m_nodes[i].bv_pos); + } + } + _byte_tree(_byte_tree const & bt) : m_nodes(bt.m_nodes) + { + for (uint32_t i = 0; i < fixed_sigma; ++i) + m_c_to_leaf[i] = bt.m_c_to_leaf[i]; + for (uint32_t i = 0; i < fixed_sigma; ++i) + m_path[i] = bt.m_path[i]; + } + _byte_tree & operator=(_byte_tree const & bt) + { + if (this != &bt) + { + _byte_tree tmp(bt); + *this = std::move(tmp); + } + return *this; + } + _byte_tree & operator=(_byte_tree && bt) + { + if (this != &bt) + { + m_nodes = std::move(bt.m_nodes); + for (uint32_t i = 0; i < fixed_sigma; ++i) + m_c_to_leaf[i] = bt.m_c_to_leaf[i]; + for (uint32_t i = 0; i < fixed_sigma; ++i) + m_path[i] = bt.m_path[i]; + } + return *this; + } + uint64_t serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + uint64_t written_bytes = 0; + uint64_t m_nodes_size = m_nodes.size(); + written_bytes += write_member(m_nodes_size, out, child, "m_nodes.size()"); + written_bytes += serialize_vector(m_nodes, out, child, "m_nodes"); + out.write((char *)m_c_to_leaf, fixed_sigma * sizeof(m_c_to_leaf[0])); + written_bytes += fixed_sigma * sizeof(m_c_to_leaf[0]); + out.write((char *)m_path, fixed_sigma * sizeof(m_path[0])); + written_bytes += fixed_sigma * sizeof(m_path[0]); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + uint64_t m_nodes_size = 0; + read_member(m_nodes_size, in); + m_nodes = std::vector(m_nodes_size); + load_vector(m_nodes, in); + in.read((char *)m_c_to_leaf, fixed_sigma * sizeof(m_c_to_leaf[0])); + in.read((char *)m_path, fixed_sigma * sizeof(m_path[0])); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_nodes)); + ar(CEREAL_NVP(m_c_to_leaf)); + ar(CEREAL_NVP(m_path)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_nodes)); + ar(CEREAL_NVP(m_c_to_leaf)); + ar(CEREAL_NVP(m_path)); + } + bool operator==(_byte_tree const & other) const noexcept + { + return (m_nodes == other.m_nodes) + ; + } + bool operator!=(_byte_tree const & other) const noexcept + { + return !(*this == other); + } + inline node_type c_to_leaf(value_type c) const + { + return m_c_to_leaf[c]; + } + static inline node_type root() + { + return 0; + } + uint64_t size() const + { + return m_nodes.size(); + } + inline node_type parent(node_type v) const + { + return m_nodes[v].parent; + } + inline node_type child(node_type v, uint8_t i) const + { + return m_nodes[v].child[i]; + } + inline bool is_leaf(node_type v) const + { + return m_nodes[v].child[0] == undef; + } + inline uint64_t size(node_type v) const + { + auto next_v = t_dfs_shape ? m_nodes[v].child[0] : v + 1; + return bv_pos(next_v) - bv_pos(v); + } + inline uint64_t bit_path(value_type c) const + { + return m_path[c]; + } + inline uint64_t bv_pos(node_type v) const + { + return m_nodes[v].bv_pos; + } + inline uint64_t bv_pos_rank(node_type v) const + { + return m_nodes[v].bv_pos_rank; + } + inline bool is_valid(node_type v) const + { + return v != undef; + } + inline std::pair symbol_gte(value_type c) const + { + for (uint32_t i = c; i < fixed_sigma; i++) + { + if (m_c_to_leaf[i] != undef) + { + return {true, i}; + } + } + return {false, 0}; + } + inline std::pair symbol_lte(value_type c) const + { + for (uint32_t i = c; i > 0; i--) + { + if (m_c_to_leaf[i] != undef) + { + return {true, i}; + } + } + if (m_c_to_leaf[0] != undef) + return {true, 0}; + return {false, 0}; + } +}; +template +struct byte_tree +{ + template + using type = _byte_tree; +}; +template +struct _int_tree +{ + using alphabet_category = int_alphabet_tag; + using value_type = uint64_t; + using node_type = uint64_t; + using data_node = _node<_int_tree>; + enum : uint64_t + { + undef = 0xFFFFFFFFFFFFFFFFULL + }; + enum : uint8_t + { + int_width = 0 + }; + std::vector m_nodes; + std::vector m_c_to_leaf; + std::vector m_path; + _int_tree() = default; + _int_tree(std::vector const & temp_nodes, uint64_t & bv_size, t_wt const *) + { + m_nodes.resize(temp_nodes.size()); + m_nodes[0] = temp_nodes.back(); + bv_size = 0; + size_t node_cnt = 1; + node_type last_parent = undef; + std::deque q; + q.push_back(0); + uint64_t max_c = 0; + while (!q.empty()) + { + node_type idx; + if (!t_dfs_shape) + { + idx = q.front(); + q.pop_front(); + } + else + { + idx = q.back(); + q.pop_back(); + } + uint64_t frq = m_nodes[idx].bv_pos; + m_nodes[idx].bv_pos = bv_size; + if (m_nodes[idx].child[0] != undef) + { + bv_size += frq; + } + else if (max_c < m_nodes[idx].bv_pos_rank) + { + max_c = m_nodes[idx].bv_pos_rank; + } + if (idx > 0) + { + if (last_parent != m_nodes[idx].parent) + m_nodes[m_nodes[idx].parent].child[0] = idx; + else + m_nodes[m_nodes[idx].parent].child[1] = idx; + last_parent = m_nodes[idx].parent; + } + if (m_nodes[idx].child[0] != undef) + { + for (uint32_t k = 0; k < 2; ++k) + { + m_nodes[node_cnt] = temp_nodes[m_nodes[idx].child[k]]; + m_nodes[node_cnt].parent = idx; + q.push_back(node_cnt); + m_nodes[idx].child[k] = node_cnt++; + } + } + } + m_c_to_leaf.resize(max_c + 1, undef); + for (node_type v = 0; v < m_nodes.size(); ++v) + { + if (m_nodes[v].child[0] == undef) + { + uint64_t c = m_nodes[v].bv_pos_rank; + m_c_to_leaf[c] = v; + if (c > max_c) + max_c = c; + } + } + m_path = std::vector(m_c_to_leaf.size(), 0); + for (value_type c = 0, prev_c = 0; c < m_c_to_leaf.size(); ++c) + { + if (m_c_to_leaf[c] != undef) + { + node_type v = m_c_to_leaf[c]; + uint64_t w = 0; + uint64_t l = 0; + while (v != root()) + { + w <<= 1; + if (m_nodes[m_nodes[v].parent].child[1] == v) + w |= 1ULL; + ++l; + v = m_nodes[v].parent; + } + if (l > 56) + { + throw std::logic_error("Code depth greater than 56!!!"); + } + m_path[c] = w | (l << 56); + prev_c = c; + } + else + { + uint64_t pl = 0; + m_path[c] = prev_c | (pl << 56); + } + } + } + template + void init_node_ranks(t_rank_type const & rank) + { + for (uint64_t i = 0; i < m_nodes.size(); ++i) + { + if (m_nodes[i].child[0] != undef) + m_nodes[i].bv_pos_rank = rank.rank(m_nodes[i].bv_pos); + } + } + _int_tree(_int_tree const & bt) = default; + _int_tree(_int_tree && bt) = default; + _int_tree & operator=(_int_tree const & bt) = default; + _int_tree & operator=(_int_tree && bt) = default; + uint64_t serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + uint64_t written_bytes = 0; + uint64_t m_nodes_size = m_nodes.size(); + written_bytes += write_member(m_nodes_size, out, child, "m_nodes.size()"); + written_bytes += serialize_vector(m_nodes, out, child, "m_nodes"); + uint64_t m_c_to_leaf_size = m_c_to_leaf.size(); + written_bytes += write_member(m_c_to_leaf_size, out, child, "m_c_to_leaf.size()"); + written_bytes += serialize_vector(m_c_to_leaf, out, child, "m_c_to_leaf"); + uint64_t m_path_size = m_path.size(); + written_bytes += write_member(m_path_size, out, child, "m_path.size()"); + written_bytes += serialize_vector(m_path, out, child, "m_path"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + uint64_t m_nodes_size = 0; + read_member(m_nodes_size, in); + m_nodes = std::vector(m_nodes_size); + load_vector(m_nodes, in); + uint64_t m_c_to_leaf_size = 0; + read_member(m_c_to_leaf_size, in); + m_c_to_leaf = std::vector(m_c_to_leaf_size); + load_vector(m_c_to_leaf, in); + uint64_t m_path_size = 0; + read_member(m_path_size, in); + m_path = std::vector(m_path_size); + load_vector(m_path, in); + } + bool operator==(_int_tree const & other) const noexcept + { + return (m_nodes == other.m_nodes) && (m_c_to_leaf == other.m_c_to_leaf) && (m_path == other.m_path); + } + bool operator!=(_int_tree const & other) const noexcept + { + return !(*this == other); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_nodes)); + ar(CEREAL_NVP(m_c_to_leaf)); + ar(CEREAL_NVP(m_path)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_nodes)); + ar(CEREAL_NVP(m_c_to_leaf)); + ar(CEREAL_NVP(m_path)); + } + inline node_type c_to_leaf(value_type c) const + { + if (c >= m_c_to_leaf.size()) + return undef; + else + return m_c_to_leaf[c]; + } + static inline node_type root() + { + return 0; + } + uint64_t size() const + { + return m_nodes.size(); + } + inline node_type parent(node_type v) const + { + return m_nodes[v].parent; + } + inline node_type child(node_type v, uint8_t i) const + { + return m_nodes[v].child[i]; + } + inline bool is_leaf(node_type v) const + { + return m_nodes[v].child[0] == undef; + } + inline uint64_t size(node_type v) const + { + auto next_v = t_dfs_shape ? m_nodes[v].child[0] : v + 1; + return bv_pos(next_v) - bv_pos(v); + } + inline uint64_t bit_path(value_type c) const + { + if (c >= m_path.size()) + { + return m_path.size() - 1; + } + return m_path[c]; + } + inline uint64_t bv_pos(node_type v) const + { + return m_nodes[v].bv_pos; + } + inline uint64_t bv_pos_rank(node_type v) const + { + return m_nodes[v].bv_pos_rank; + } + inline bool is_valid(node_type v) const + { + return v != undef; + } + inline std::pair symbol_gte(value_type c) const + { + if (c >= m_c_to_leaf.size()) + { + return {false, 0}; + } + for (value_type i = c; i < m_c_to_leaf.size(); i++) + { + if (m_c_to_leaf[i] != undef) + { + return {true, i}; + } + } + return {false, 0}; + } + inline std::pair symbol_lte(value_type c) const + { + if (c >= m_c_to_leaf.size()) + { + c = m_c_to_leaf.size() - 1; + } + for (value_type i = c; i > 0; i--) + { + if (m_c_to_leaf[i] != undef) + { + return {true, i}; + } + } + if (m_c_to_leaf[0] != undef) + return {true, 0}; + return {false, 0}; + } +}; +template +struct int_tree +{ + template + using type = _int_tree; +}; +template +class node_bv_container +{ +public: + typedef typename t_bv::value_type value_type; + typedef typename t_bv::size_type size_type; + typedef typename t_bv::difference_type difference_type; + typedef typename t_bv::const_iterator iterator; +private: + iterator m_begin, m_end; +public: + node_bv_container(iterator b, iterator e) : m_begin(b), m_end(e) + {} + value_type operator[](size_type i) const + { + return *(m_begin + i); + } + size_type size() const + { + return m_end - m_begin; + } + iterator begin() const + { + return m_begin; + } + iterator end() const + { + return m_end; + } +}; +template +class node_seq_container +{ +public: + typedef typename t_bv::value_type value_type; + typedef typename t_bv::size_type size_type; + typedef typename t_bv::difference_type difference_type; + typedef typename t_bv::const_iterator iterator; +private: + iterator m_begin, m_end; +public: + node_seq_container(iterator b, iterator e) : m_begin(b), m_end(e) + {} + value_type operator[](size_type i) const + { + return *(m_begin + i); + } + size_type size() const + { + return m_end - m_begin; + } + iterator begin() const + { + return m_begin; + } + iterator end() const + { + return m_end; + } +}; +inline bool empty(range_type const & r) +{ + return std::get<0>(r) == (std::get<1>(r) + 1); +} +inline int_vector<>::size_type size(range_type const & r) +{ + return std::get<1>(r) - std::get<0>(r) + 1; +} +inline pc_node::pc_node(uint64_t freq, uint64_t sym, uint64_t parent, uint64_t child_left, uint64_t child_right) : + freq(freq), + sym(sym), + parent(parent) +{ + child[0] = child_left; + child[1] = child_right; +} +} +#endif +namespace sdsl +{ +template +struct has_interval_symbols; +template +struct _interval_symbols_wt; +template +struct has_expand; +template +std::vector> +intersect(t_wt const & wt, std::vector const & ranges, typename t_wt::size_type t = 0) +{ + using std::get; + using size_type = typename t_wt::size_type; + using value_type = typename t_wt::value_type; + using node_type = typename t_wt::node_type; + using pnvr_type = std::pair; + typedef std::stack stack_type; + static_assert(has_expand(node_type const &)>::value, + "intersect requires t_wt to have expand(const node_type&)"); + using p_t = std::pair; + std::vector res; + auto push_node = [&t](stack_type & s, node_type & child, range_vec_type & child_range) + { + auto end = std::remove_if(child_range.begin(), + child_range.end(), + [&](const range_type & x) + { + return empty(x); + }); + if (end > child_range.begin() + t - 1) + { + s.emplace(pnvr_type(child, range_vec_type(child_range.begin(), end))); + } + }; + if (ranges.empty()) + return res; + t = (t == 0) ? ranges.size() : t; + std::stack stack; + stack.emplace(pnvr_type(wt.root(), ranges)); + while (!stack.empty()) + { + pnvr_type x = stack.top(); + stack.pop(); + if (wt.is_leaf(x.first)) + { + auto const & iv = x.second; + if (t <= iv.size()) + { + auto freq = std::accumulate(iv.begin(), + iv.end(), + 0ULL, + [](size_type acc, range_type const & r) + { + return acc + (r[1] - r[0] + 1); + }); + res.emplace_back(wt.sym(x.first), freq); + } + } + else + { + auto child = wt.expand(x.first); + auto child_ranges = wt.expand(x.first, x.second); + push_node(stack, get<1>(child), get<1>(child_ranges)); + push_node(stack, get<0>(child), get<0>(child_ranges)); + } + } + return res; +} +template +std::pair +quantile_freq(t_wt const & wt, typename t_wt::size_type lb, typename t_wt::size_type rb, typename t_wt::size_type q) +{ + static_assert(t_wt::lex_ordered, "quantile_freq requires a lex_ordered WT"); + using std::get; + using node_type = typename t_wt::node_type; + static_assert(has_expand(node_type const &)>::value, + "quantile_freq requires t_wt to have expand(const node_type&)"); + node_type v = wt.root(); + range_type r{{lb, rb}}; + while (!wt.is_leaf(v)) + { + auto child = wt.expand(v); + auto child_ranges = wt.expand(v, r); + auto num_zeros = size(get<0>(child_ranges)); + if (q >= num_zeros) + { + q -= num_zeros; + v = get<1>(child); + r = get<1>(child_ranges); + } + else + { + v = get<0>(child); + r = get<0>(child_ranges); + } + } + return {wt.sym(v), size(r)}; +} +template +void _interval_symbols_rec(t_wt const & wt, + range_type r, + typename t_wt::size_type & k, + std::vector & cs, + std::vector & rank_c_i, + std::vector & rank_c_j, + const typename t_wt::node_type & v) +{ + using std::get; + if (wt.is_leaf(v)) + { + rank_c_i[k] = r[0]; + rank_c_j[k] = r[1] + 1; + cs[k++] = wt.sym(v); + } + else + { + auto child = wt.expand(v); + auto child_ranges = wt.expand(v, r); + if (!empty(get<0>(child_ranges))) + { + _interval_symbols_rec(wt, get<0>(child_ranges), k, cs, rank_c_i, rank_c_j, get<0>(child)); + } + if (!empty(get<1>(child_ranges))) + { + _interval_symbols_rec(wt, get<1>(child_ranges), k, cs, rank_c_i, rank_c_j, get<1>(child)); + } + } +} +template +void _interval_symbols(t_wt const & wt, + typename t_wt::size_type i, + typename t_wt::size_type j, + typename t_wt::size_type & k, + std::vector & cs, + std::vector & rank_c_i, + std::vector & rank_c_j) +{ + assert(i <= j and j <= wt.size()); + k = 0; + if ((i + 1) == j) + { + auto res = wt.inverse_select(i); + cs[0] = res.second; + rank_c_i[0] = res.first; + rank_c_j[0] = res.first + 1; + k = 1; + return; + } + else if (j > i) + { + _interval_symbols_rec(wt, range_type{{i, j - 1}}, k, cs, rank_c_i, rank_c_j, wt.root()); + } +} +template +void interval_symbols(t_wt const & wt, + typename t_wt::size_type i, + typename t_wt::size_type j, + typename t_wt::size_type & k, + std::vector & cs, + std::vector & rank_c_i, + std::vector & rank_c_j) +{ + constexpr bool has_own = has_interval_symbols::value; + if (has_own) + { + _interval_symbols_wt::call(wt, i, j, k, cs, rank_c_i, rank_c_j); + } + else + { + _interval_symbols(wt, i, j, k, cs, rank_c_i, rank_c_j); + } +} +template +struct has_interval_symbols +{ + template + static constexpr auto check(T *) -> typename std::is_same< + decltype(std::declval().interval_symbols(std::declval(), + std::declval(), + std::declval(), + std::declval &>(), + std::declval &>(), + std::declval &>())), + void>::type + { + return std::true_type(); + } + template + static constexpr std::false_type check(...) + { + return std::false_type(); + } + typedef decltype(check(nullptr)) type; + static constexpr bool value = type::value; +}; +template +struct _interval_symbols_wt +{ + typedef typename t_wt::size_type size_type; + typedef typename t_wt::value_type value_type; + static void call(t_wt const & wt, + size_type i, + size_type j, + size_type & k, + std::vector & cs, + std::vector & rank_c_i, + std::vector & rank_c_j) + { + wt.interval_symbols(i, j, k, cs, rank_c_i, rank_c_j); + } +}; +template +struct _interval_symbols_wt +{ + typedef typename t_wt::size_type size_type; + typedef typename t_wt::value_type value_type; + static void call(t_wt const &, + size_type, + size_type, + size_type &, + std::vector &, + std::vector &, + std::vector &) + {} +}; +template +struct has_expand +{ + static_assert(std::integral_constant::value, "Second template parameter needs to be of function type."); +}; +template +struct has_expand +{ + template + static constexpr auto check(T *) -> + typename std::is_same().expand(std::declval()...)), t_ret>::type + { + return std::true_type(); + } + template + static constexpr std::false_type check(...) + { + return std::false_type(); + } + typedef decltype(check(nullptr)) type; + static constexpr bool value = type::value; +}; +template +struct has_range_search_2d +{ + template + static constexpr auto check(T *) -> typename std::is_same< + decltype(std::declval().range_search_2d( + std::declval(), + std::declval(), + std::declval(), + std::declval(), + false)), + std::pair>>>::type + { + return std::true_type(); + } + template + static constexpr std::false_type check(...) + { + return std::false_type(); + } + typedef decltype(check(nullptr)) type; + static constexpr bool value = type::value; +}; +template +std::pair _symbol_lte(t_wt const & wt, typename t_wt::value_type c) +{ + if (((1ULL) << (wt.max_level)) <= c) + { + c = sdsl::bits::lo_set[wt.max_level]; + } + auto node = wt.root(); + auto predecessor_subtree = node; + uint64_t mask = (1ULL) << (wt.max_level - 1); + while (!wt.is_leaf(node)) + { + auto children = wt.expand(node); + auto left_child = std::get<0>(children); + auto right_child = std::get<1>(children); + if (c & (mask >> node.level)) + { + if (right_child.size) + { + node = right_child; + if (left_child.size) + { + predecessor_subtree = left_child; + } + } + else + { + node = left_child; + c = sdsl::bits::all_set; + } + } + else + { + if (left_child.size) + { + node = left_child; + } + else + { + if (predecessor_subtree == wt.root()) + { + return {false, 0}; + } + node = predecessor_subtree; + c = sdsl::bits::all_set; + } + } + } + return {true, node.sym}; +} +template +std::pair _symbol_gte(t_wt const & wt, typename t_wt::value_type c) +{ + if (((1ULL) << (wt.max_level)) <= c) + { + return {false, 0}; + } + auto node = wt.root(); + auto successor_subtree = node; + uint64_t mask = (1ULL) << (wt.max_level - 1); + while (!wt.is_leaf(node)) + { + auto children = wt.expand(node); + auto left_child = std::get<0>(children); + auto right_child = std::get<1>(children); + if (c & (mask >> node.level)) + { + if (right_child.size) + { + node = right_child; + } + else + { + if (successor_subtree == wt.root()) + { + return {false, 0}; + } + node = successor_subtree; + c = 0; + } + } + else + { + if (left_child.size) + { + node = left_child; + if (right_child.size) + { + successor_subtree = right_child; + } + } + else + { + node = right_child; + c = 0; + } + } + } + return {true, node.sym}; +} +template +struct _symbols_calls_wt +{ + typedef typename t_wt::value_type value_type; + static std::pair call_symbol_gte(t_wt const & wt, value_type c) + { + return wt.symbol_gte(c); + } + static std::pair call_symbol_lte(t_wt const & wt, value_type c) + { + return wt.symbol_lte(c); + } +}; +template +struct _symbols_calls_wt +{ + typedef typename t_wt::value_type value_type; + static std::pair call_symbol_gte(t_wt const & wt, value_type c) + { + return _symbol_gte(wt, c); + } + static std::pair call_symbol_lte(t_wt const & wt, value_type c) + { + return _symbol_lte(wt, c); + } +}; +template +struct has_symbols_wt +{ + template + static constexpr auto check(T *) -> + typename std::is_same().symbol_gte(std::declval())), + std::pair>::type + { + return std::true_type(); + } + template + static constexpr std::false_type check(...) + { + return std::false_type(); + } + typedef decltype(check(nullptr)) type; + static constexpr bool value = type::value; +}; +template +std::pair symbol_lte(t_wt const & wt, typename t_wt::value_type c) +{ + static_assert(t_wt::lex_ordered, "symbols_lte requires a lex_ordered WT"); + constexpr bool has_own = has_symbols_wt::value; + return _symbols_calls_wt::call_symbol_lte(wt, c); +} +template +std::pair symbol_gte(t_wt const & wt, typename t_wt::value_type c) +{ + static_assert(t_wt::lex_ordered, "symbols_gte requires a lex_ordered WT"); + constexpr bool has_own = has_symbols_wt::value; + return _symbols_calls_wt::call_symbol_gte(wt, c); +} +template +std::vector restricted_unique_range_values(t_wt const & wt, + typename t_wt::size_type x_i, + typename t_wt::size_type x_j, + typename t_wt::value_type y_i, + typename t_wt::value_type y_j) +{ + static_assert(t_wt::lex_ordered, "restricted_unique_range_values requires a lex_ordered WT"); + std::vector unique_values; + if (x_j > wt.size() - 1) + x_j = wt.size() - 1; + if ((x_i > x_j) || (y_i > y_j)) + { + return unique_values; + } + auto lower_y_bound = symbol_gte(wt, y_i); + auto upper_y_bound = symbol_lte(wt, y_j); + if (!lower_y_bound.first || !upper_y_bound.first || (lower_y_bound.second > upper_y_bound.second)) + { + return unique_values; + } + auto lower_y_bound_path = wt.path(lower_y_bound.second); + auto upper_y_bound_path = wt.path(upper_y_bound.second); + auto compare_path = [](uint64_t node_path, uint64_t node_path_len, std::pair bound_path) -> int + { + auto bound_path_len = bound_path.first; + auto bound_path_val = bound_path.second; + if (bound_path_len > node_path_len) + bound_path_val = bound_path_val >> (bound_path_len - node_path_len); + if (bound_path_len < node_path_len) + bound_path_val = bound_path_val << (node_path_len - bound_path_len); + if (node_path < bound_path_val) + return -1; + if (node_path > bound_path_val) + return 1; + return 0; + }; + std::stack> stack; + sdsl::range_type initial_range = {{x_i, x_j}}; + stack.emplace(wt.root(), initial_range, 0, 0); + while (!stack.empty()) + { + auto node_data = stack.top(); + stack.pop(); + auto node = std::get<0>(node_data); + auto range = std::get<1>(node_data); + auto node_path = std::get<2>(node_data); + auto node_level = std::get<3>(node_data); + if (wt.is_leaf(node)) + { + unique_values.emplace_back(wt.sym(node)); + } + else + { + auto children = wt.expand(node); + auto left_path = node_path << 1ULL; + auto right_path = (node_path << 1ULL) | 1ULL; + auto child_ranges = wt.expand(node, range); + if (compare_path(right_path, node_level + 1, upper_y_bound_path) < 1) + { + auto right_child = std::get<1>(children); + auto right_range = std::get<1>(child_ranges); + if (!sdsl::empty(right_range)) + stack.emplace(right_child, right_range, right_path, node_level + 1); + } + if (compare_path(left_path, node_level + 1, lower_y_bound_path) > -1) + { + auto left_child = std::get<0>(children); + auto left_range = std::get<0>(child_ranges); + if (!sdsl::empty(left_range)) + stack.emplace(left_child, left_range, left_path, node_level + 1); + } + } + } + return unique_values; +} +template +struct void_ +{ + typedef void type; +}; +template +struct has_node_type +{ + typedef std::false_type t_expr; + enum + { + value = t_expr::value + }; +}; +template +struct has_node_type::type> +{ + typedef std::true_type t_expr; + enum + { + value = t_expr::value + }; +}; +} +#endif +#ifndef INCLUDED_SDSL_WT_HUFF +#define INCLUDED_SDSL_WT_HUFF +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_WT_PC +#define INCLUDED_SDSL_WT_PC +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +template > +class wt_pc +{ +public: + typedef typename t_tree_strat::template type tree_strat_type; + typedef int_vector<>::size_type size_type; + typedef typename tree_strat_type::value_type value_type; + typedef typename t_bitvector::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef t_bitvector bit_vector_type; + typedef t_rank rank_1_type; + typedef t_select select_1_type; + typedef t_select_zero select_0_type; + typedef wt_tag index_category; + typedef typename tree_strat_type::alphabet_category alphabet_category; + typedef typename t_shape::template type shape_type; + enum + { + lex_ordered = shape_type::lex_ordered + }; + using node_type = typename tree_strat_type::node_type; +private: +#ifdef WT_PC_CACHE + mutable value_type m_last_access_answer; + mutable size_type m_last_access_i; + mutable size_type m_last_access_rl; +#endif + size_type m_size = 0; + size_type m_sigma = 0; + bit_vector_type m_bv; + rank_1_type m_bv_rank; + select_1_type m_bv_select1; + select_0_type m_bv_select0; + tree_strat_type m_tree; + void insert_char(value_type old_chr, std::vector & bv_node_pos, size_type times, bit_vector & bv) + { + uint64_t p = m_tree.bit_path(old_chr); + uint32_t path_len = p >> 56; + node_type v = m_tree.root(); + for (uint32_t l = 0; l < path_len; ++l, p >>= 1) + { + if (p & 1) + { + bv.set_int(bv_node_pos[v], 0xFFFFFFFFFFFFFFFFULL, times); + } + bv_node_pos[v] += times; + v = m_tree.child(v, p & 1); + } + } + size_type construct_tree_shape(std::vector const & C) + { + std::vector temp_nodes; + shape_type::construct_tree(C, temp_nodes); + size_type bv_size = 0; + m_tree = tree_strat_type(temp_nodes, bv_size, this); + return bv_size; + } + void construct_init_rank_select() + { + util::init_support(m_bv_rank, &m_bv); + util::init_support(m_bv_select0, &m_bv); + util::init_support(m_bv_select1, &m_bv); + } + void _interval_symbols(size_type i, + size_type j, + size_type & k, + std::vector & cs, + std::vector & rank_c_i, + std::vector & rank_c_j, + node_type v) const + { + size_type i_new = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v)); + size_type j_new = (m_bv_rank(m_tree.bv_pos(v) + j) - m_tree.bv_pos_rank(v)); + i -= i_new; + j -= j_new; + if (i != j) + { + node_type v_new = m_tree.child(v, 0); + if (!m_tree.is_leaf(v_new)) + { + _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, v_new); + } + else + { + rank_c_i[k] = i; + rank_c_j[k] = j; + cs[k++] = m_tree.bv_pos_rank(v_new); + } + } + if (i_new != j_new) + { + node_type v_new = m_tree.child(v, 1); + if (!m_tree.is_leaf(v_new)) + { + _interval_symbols(i_new, j_new, k, cs, rank_c_i, rank_c_j, v_new); + } + else + { + rank_c_i[k] = i_new; + rank_c_j[k] = j_new; + cs[k++] = m_tree.bv_pos_rank(v_new); + } + } + } +public: + size_type const & sigma = m_sigma; + bit_vector_type const & bv = m_bv; + wt_pc(){}; + template + wt_pc(t_it begin, t_it end) : m_size(std::distance(begin, end)) + { + if (0 == m_size) + return; + std::vector C; + calculate_character_occurences(begin, end, C); + calculate_effective_alphabet_size(C, m_sigma); + size_type tree_size = construct_tree_shape(C); + bit_vector temp_bv(tree_size, 0); + std::vector bv_node_pos(m_tree.size(), 0); + for (size_type v = 0; v < m_tree.size(); ++v) + { + bv_node_pos[v] = m_tree.bv_pos(v); + } + value_type old_chr = *begin; + uint32_t times = 0; + for (auto it = begin; it != end; ++it) + { + value_type chr = *it; + if (chr != old_chr) + { + insert_char(old_chr, bv_node_pos, times, temp_bv); + times = 1; + old_chr = chr; + } + else + { + ++times; + if (times == 64) + { + insert_char(old_chr, bv_node_pos, times, temp_bv); + times = 0; + } + } + } + if (times > 0) + { + insert_char(old_chr, bv_node_pos, times, temp_bv); + } + m_bv = bit_vector_type(std::move(temp_bv)); + construct_init_rank_select(); + m_tree.init_node_ranks(m_bv_rank); + } + template + wt_pc(t_it begin, t_it end, std::string) : wt_pc(begin, end) + {} + wt_pc(wt_pc const & wt) : + m_size(wt.m_size), + m_sigma(wt.m_sigma), + m_bv(wt.m_bv), + m_bv_rank(wt.m_bv_rank), + m_bv_select1(wt.m_bv_select1), + m_bv_select0(wt.m_bv_select0), + m_tree(wt.m_tree) + { + m_bv_rank.set_vector(&m_bv); + m_bv_select1.set_vector(&m_bv); + m_bv_select0.set_vector(&m_bv); + } + wt_pc(wt_pc && wt) : + m_size(wt.m_size), + m_sigma(wt.m_sigma), + m_bv(std::move(wt.m_bv)), + m_bv_rank(std::move(wt.m_bv_rank)), + m_bv_select1(std::move(wt.m_bv_select1)), + m_bv_select0(std::move(wt.m_bv_select0)), + m_tree(std::move(wt.m_tree)) + { + m_bv_rank.set_vector(&m_bv); + m_bv_select1.set_vector(&m_bv); + m_bv_select0.set_vector(&m_bv); + } + wt_pc & operator=(wt_pc const & wt) + { + if (this != &wt) + { + wt_pc tmp(wt); + *this = std::move(tmp); + } + return *this; + } + wt_pc & operator=(wt_pc && wt) + { + if (this != &wt) + { + m_size = wt.m_size; + m_sigma = wt.m_sigma; + m_bv = std::move(wt.m_bv); + m_bv_rank = std::move(wt.m_bv_rank); + m_bv_rank.set_vector(&m_bv); + m_bv_select1 = std::move(wt.m_bv_select1); + m_bv_select1.set_vector(&m_bv); + m_bv_select0 = std::move(wt.m_bv_select0); + m_bv_select0.set_vector(&m_bv); + m_tree = std::move(wt.m_tree); + } + return *this; + } + size_type size() const + { + return m_size; + } + bool empty() const + { + return m_size == 0; + } + value_type operator[](size_type i) const + { + assert(i < size()); + node_type v = m_tree.root(); + while (!m_tree.is_leaf(v)) + { + if (m_bv[m_tree.bv_pos(v) + i]) + { + i = m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v); + v = m_tree.child(v, 1); + } + else + { + i -= (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v)); + v = m_tree.child(v, 0); + } + } + return m_tree.bv_pos_rank(v); + }; + size_type rank(size_type i, value_type c) const + { + assert(i <= size()); + if (!m_tree.is_valid(m_tree.c_to_leaf(c))) + { + return 0; + } + if (m_sigma == 1) + { + return i; + } + uint64_t p = m_tree.bit_path(c); + uint32_t path_len = (p >> 56); + size_type result = i; + node_type v = m_tree.root(); + for (uint32_t l = 0; l < path_len and result; ++l, p >>= 1) + { + if (p & 1) + { + result = (m_bv_rank(m_tree.bv_pos(v) + result) - m_tree.bv_pos_rank(v)); + } + else + { + result -= (m_bv_rank(m_tree.bv_pos(v) + result) - m_tree.bv_pos_rank(v)); + } + v = m_tree.child(v, p & 1); + } + return result; + }; + std::pair inverse_select(size_type i) const + { + assert(i < size()); + node_type v = m_tree.root(); + while (!m_tree.is_leaf(v)) + { + if (m_bv[m_tree.bv_pos(v) + i]) + { + i = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v)); + v = m_tree.child(v, 1); + } + else + { + i -= (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v)); + v = m_tree.child(v, 0); + } + } + return std::make_pair(i, (value_type)m_tree.bv_pos_rank(v)); + } + size_type select(size_type i, value_type c) const + { + assert(1 <= i and i <= rank(size(), c)); + node_type v = m_tree.c_to_leaf(c); + if (!m_tree.is_valid(v)) + { + return m_size; + } + if (m_sigma == 1) + { + return std::min(i - 1, m_size); + } + size_type result = i - 1; + uint64_t p = m_tree.bit_path(c); + uint32_t path_len = (p >> 56); + p <<= (64 - path_len); + for (uint32_t l = 0; l < path_len; ++l, p <<= 1) + { + if ((p & 0x8000000000000000ULL) == 0) + { + v = m_tree.parent(v); + result = m_bv_select0(m_tree.bv_pos(v) - m_tree.bv_pos_rank(v) + result + 1) - m_tree.bv_pos(v); + } + else + { + v = m_tree.parent(v); + result = m_bv_select1(m_tree.bv_pos_rank(v) + result + 1) - m_tree.bv_pos(v); + } + } + return result; + }; + void interval_symbols(size_type i, + size_type j, + size_type & k, + std::vector & cs, + std::vector & rank_c_i, + std::vector & rank_c_j) const + { + assert(i <= j and j <= size()); + if (i == j) + { + k = 0; + } + else if (1 == m_sigma) + { + k = 1; + cs[0] = m_tree.bv_pos_rank(m_tree.root()); + rank_c_i[0] = std::min(i, m_size); + rank_c_j[0] = std::min(j, m_size); + } + else if ((j - i) == 1) + { + k = 1; + auto rc = inverse_select(i); + rank_c_i[0] = rc.first; + cs[0] = rc.second; + rank_c_j[0] = rank_c_i[0] + 1; + } + else if ((j - i) == 2) + { + auto rc = inverse_select(i); + rank_c_i[0] = rc.first; + cs[0] = rc.second; + rc = inverse_select(i + 1); + rank_c_i[1] = rc.first; + cs[1] = rc.second; + if (cs[0] == cs[1]) + { + k = 1; + rank_c_j[0] = rank_c_i[0] + 2; + } + else + { + k = 2; + if (lex_ordered and cs[0] > cs[1]) + { + std::swap(cs[0], cs[1]); + std::swap(rank_c_i[0], rank_c_i[1]); + } + rank_c_j[0] = rank_c_i[0] + 1; + rank_c_j[1] = rank_c_i[1] + 1; + } + } + else + { + k = 0; + _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0); + } + } + template > + typename std::enable_if::type + lex_count(size_type i, size_type j, value_type c) const + { + assert(i <= j and j <= size()); + if (1 == m_sigma) + { + value_type _c = m_tree.bv_pos_rank(m_tree.root()); + if (c == _c) + { + return t_ret_type{i, 0, 0}; + } + else if (c < _c) + { + return t_ret_type{0, 0, j - i}; + } + else + { + return t_ret_type{0, j - i, 0}; + } + } + if (i == j) + { + return t_ret_type{rank(i, c), 0, 0}; + } + uint64_t p = m_tree.bit_path(c); + uint32_t path_len = p >> 56; + if (path_len == 0) + { + value_type _c = (value_type)p; + if (c == _c) + { + return t_ret_type{0, 0, j - i}; + } + auto res = lex_count(i, j, _c); + return t_ret_type{0, j - i - std::get<2>(res), std::get<2>(res)}; + } + size_type smaller = 0, greater = 0; + node_type v = m_tree.root(); + for (uint32_t l = 0; l < path_len; ++l, p >>= 1) + { + size_type r1_1 = (m_bv_rank(m_tree.bv_pos(v) + i) - m_tree.bv_pos_rank(v)); + size_type r1_2 = (m_bv_rank(m_tree.bv_pos(v) + j) - m_tree.bv_pos_rank(v)); + if (p & 1) + { + smaller += j - r1_2 - i + r1_1; + i = r1_1; + j = r1_2; + } + else + { + greater += r1_2 - r1_1; + i -= r1_1; + j -= r1_2; + } + v = m_tree.child(v, p & 1); + } + return t_ret_type{i, smaller, greater}; + } + template > + typename std::enable_if::type lex_smaller_count(size_type i, + value_type c) const + { + assert(i <= size()); + if (1 == m_sigma) + { + value_type _c = m_tree.bv_pos_rank(m_tree.root()); + if (c == _c) + { + return t_ret_type{i, 0}; + } + else if (c < _c) + { + return t_ret_type{0, 0}; + } + else + { + return t_ret_type{0, i}; + } + } + uint64_t p = m_tree.bit_path(c); + uint32_t path_len = p >> 56; + if (path_len == 0) + { + value_type _c = (value_type)p; + if (c == _c) + { + return t_ret_type{0, 0}; + } + auto res = lex_smaller_count(i, _c); + return t_ret_type{0, std::get<0>(res) + std::get<1>(res)}; + } + size_type result = 0; + size_type all = i; + node_type v = m_tree.root(); + for (uint32_t l = 0; l < path_len and all; ++l, p >>= 1) + { + size_type ones = (m_bv_rank(m_tree.bv_pos(v) + all) - m_tree.bv_pos_rank(v)); + if (p & 1) + { + result += all - ones; + all = ones; + } + else + { + all -= ones; + } + v = m_tree.child(v, p & 1); + } + return t_ret_type{all, result}; + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += write_member(m_sigma, out, child, "sigma"); + written_bytes += m_bv.serialize(out, child, "bv"); + written_bytes += m_bv_rank.serialize(out, child, "bv_rank"); + written_bytes += m_bv_select1.serialize(out, child, "bv_select_1"); + written_bytes += m_bv_select0.serialize(out, child, "bv_select_0"); + written_bytes += m_tree.serialize(out, child, "tree"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(m_size, in); + read_member(m_sigma, in); + m_bv.load(in); + m_bv_rank.load(in, &m_bv); + m_bv_select1.load(in, &m_bv); + m_bv_select0.load(in, &m_bv); + m_tree.load(in); + } + bool operator==(wt_pc const & other) const noexcept + { + return (m_size == other.m_size) && (m_sigma == other.m_sigma) && (m_bv == other.m_bv) + && (m_bv_rank == other.m_bv_rank) && (m_bv_select1 == other.m_bv_select1) + && (m_bv_select0 == other.m_bv_select0) && (m_tree == other.m_tree); + } + bool operator!=(wt_pc const & other) const noexcept + { + return !(*this == other); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_bv)); + ar(CEREAL_NVP(m_bv_rank)); + ar(CEREAL_NVP(m_bv_select1)); + ar(CEREAL_NVP(m_bv_select0)); + ar(CEREAL_NVP(m_tree)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_bv)); + ar(CEREAL_NVP(m_bv_rank)); + m_bv_rank.set_vector(&m_bv); + ar(CEREAL_NVP(m_bv_select1)); + m_bv_select1.set_vector(&m_bv); + ar(CEREAL_NVP(m_bv_select0)); + m_bv_select0.set_vector(&m_bv); + ar(CEREAL_NVP(m_tree)); + } + auto bit_vec(node_type const & v) const -> node_bv_container + { + return node_bv_container(begin(v), end(v)); + } + auto seq(node_type const & v) const -> random_access_container> + { + return random_access_container>( + [&v, this](size_type i) + { + node_type vv = v; + while (!is_leaf(vv)) + { + auto vs = expand(vv); + auto rs = expand(vv, range_type{{0, i}}); + bool bit = *(begin(vv) + i); + i = std::get<1>(rs[bit]); + vv = vs[bit]; + } + return sym(vv); + }, + size(v)); + } + bool is_leaf(node_type const & v) const + { + return m_tree.is_leaf(v); + } + value_type sym(node_type const & v) const + { + return m_tree.bv_pos_rank(v); + } + bool empty(node_type const & v) const + { + return size(v) == 0; + } + auto size(node_type const & v) const -> decltype(m_tree.size(v)) + { + if (is_leaf(v)) + { + if (v == root()) + return size(); + else + { + auto parent = m_tree.parent(v); + auto rs = expand(parent, range_type{{0, size(parent) - 1}}); + if (m_tree.child(parent, 0) == v) + return std::get<1>(std::get<0>(rs)) - std::get<0>((std::get<0>(rs))) + 1; + else + return std::get<1>(std::get<1>(rs)) - std::get<0>((std::get<1>(rs))) + 1; + } + } + else + { + return m_tree.size(v); + } + } + node_type root() const + { + return m_tree.root(); + } + std::array expand(node_type const & v) const + { + return {{m_tree.child(v, 0), m_tree.child(v, 1)}}; + } + std::array expand(node_type const & v, range_vec_type const & ranges) const + { + auto ranges_copy = ranges; + return expand(v, std::move(ranges_copy)); + } + std::array expand(node_type const & v, range_vec_type && ranges) const + { + auto v_sp_rank = m_tree.bv_pos_rank(v); + range_vec_type res(ranges.size()); + size_t i = 0; + for (auto & r : ranges) + { + auto sp_rank = m_bv_rank(m_tree.bv_pos(v) + r[0]); + auto right_size = m_bv_rank(m_tree.bv_pos(v) + r[1] + 1) - sp_rank; + auto left_size = (r[1] - r[0] + 1) - right_size; + auto right_sp = sp_rank - v_sp_rank; + auto left_sp = r[0] - right_sp; + r = {{left_sp, left_sp + left_size - 1}}; + res[i++] = {{right_sp, right_sp + right_size - 1}}; + } + return {{ranges, std::move(res)}}; + } + std::array expand(node_type const & v, range_type const & r) const + { + auto v_sp_rank = m_tree.bv_pos_rank(v); + auto sp_rank = m_bv_rank(m_tree.bv_pos(v) + r[0]); + auto right_size = m_bv_rank(m_tree.bv_pos(v) + r[1] + 1) - sp_rank; + auto left_size = (r[1] - r[0] + 1) - right_size; + auto right_sp = sp_rank - v_sp_rank; + auto left_sp = r[0] - right_sp; + return {{{{left_sp, left_sp + left_size - 1}}, {{right_sp, right_sp + right_size - 1}}}}; + } + std::pair path(value_type c) const + { + uint64_t path = m_tree.bit_path(c); + uint64_t path_len = path >> 56; + path = bits::rev(path); + path = path >> (64 - path_len); + return {path_len, path}; + } + std::pair symbol_gte(value_type c) const + { + return m_tree.symbol_gte(c); + } + std::pair symbol_lte(value_type c) const + { + return m_tree.symbol_lte(c); + } +private: + auto begin(node_type const & v) const -> decltype(m_bv.begin() + m_tree.bv_pos(v)) + { + return m_bv.begin() + m_tree.bv_pos(v); + } + auto end(node_type const & v) const -> decltype(m_bv.begin() + m_tree.bv_pos(v) + m_tree.size(v)) + { + return m_bv.begin() + m_tree.bv_pos(v) + m_tree.size(v); + } +}; +} +#endif +namespace sdsl +{ +struct huff_shape; +template > +using wt_huff = wt_pc; +template +struct _huff_shape +{ + typedef typename t_wt::size_type size_type; + typedef std::pair tPII; + typedef std::priority_queue, + std::greater> tMPQPII; + enum + { + lex_ordered = 0 + }; + template + static void construct_tree(t_rac & C, std::vector & temp_nodes) + { + tMPQPII pq; + size_type i = 0; + std::for_each(std::begin(C), + std::end(C), + [&](decltype(*std::begin(C)) & freq) + { + if (freq > 0) + { + pq.push(tPII(freq, temp_nodes.size())); + temp_nodes.emplace_back(pc_node(freq, i)); + } + ++i; + }); + while (pq.size() > 1) + { + tPII v1, v2; + v1 = pq.top(); + pq.pop(); + v2 = pq.top(); + pq.pop(); + temp_nodes[v1.second].parent = temp_nodes.size(); + temp_nodes[v2.second].parent = temp_nodes.size(); + size_type frq_sum = v1.first + v2.first; + pq.push(tPII(frq_sum, temp_nodes.size())); + temp_nodes.emplace_back(pc_node(frq_sum, 0, pc_node::undef, v1.second, v2.second)); + } + } +}; +struct huff_shape +{ + template + using type = _huff_shape; +}; +} +#endif +namespace sdsl +{ +template +void construct_lcp_kasai(cache_config & config) +{ + static_assert(t_width == 0 or t_width == 8, + "construct_lcp_kasai: width must be `0` for integer alphabet and `8` for byte alphabet"); + int_vector<> lcp; + typedef int_vector<>::size_type size_type; + construct_isa(config); + { + int_vector text; + if (!load_from_cache(text, key_text_trait::KEY_TEXT, config)) + { + return; + } + int_vector_buffer<> isa_buf(cache_file_name(conf::KEY_ISA, config), + std::ios::in, + 1000000); + int_vector<> sa; + if (!load_from_cache(sa, conf::KEY_SA, config)) + { + return; + } + for (size_type i = 0, j = 0, sa_1 = 0, l = 0, n = isa_buf.size(); i < n; ++i) + { + sa_1 = isa_buf[i]; + if (sa_1) + { + j = sa[sa_1 - 1]; + if (l) + --l; + assert(i != j); + while (text[i + l] == text[j + l]) + { + ++l; + } + sa[sa_1 - 1] = l; + } + else + { + l = 0; + sa[n - 1] = 0; + } + } + for (size_type i = sa.size(); i > 1; --i) + { + sa[i - 1] = sa[i - 2]; + } + sa[0] = 0; + lcp = std::move(sa); + } + store_to_cache(lcp, conf::KEY_LCP, config); +} +template +void construct_lcp_PHI(cache_config & config) +{ + static_assert(t_width == 0 or t_width == 8, + "construct_lcp_PHI: width must be `0` for integer alphabet and `8` for byte alphabet"); + typedef int_vector<>::size_type size_type; + typedef int_vector text_type; + char const * KEY_TEXT = key_text_trait::KEY_TEXT; + int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, config)); + size_type n = sa_buf.size(); + assert(n > 0); + if (1 == n) + { + int_vector<> lcp(1, 0); + store_to_cache(lcp, conf::KEY_LCP, config); + return; + } + int_vector<> plcp(n, 0, sa_buf.width()); + for (size_type i = 0, sai_1 = 0; i < n; ++i) + { + size_type sai = sa_buf[i]; + plcp[sai] = sai_1; + sai_1 = sai; + } + text_type text; + load_from_cache(text, KEY_TEXT, config); + size_type max_l = 0; + for (size_type i = 0, l = 0; i < n - 1; ++i) + { + size_type phii = plcp[i]; + while (text[i + l] == text[phii + l]) + { + ++l; + } + plcp[i] = l; + if (l) + { + max_l = std::max(max_l, l); + --l; + } + } + util::clear(text); + uint8_t lcp_width = bits::hi(max_l) + 1; + std::string lcp_file = cache_file_name(conf::KEY_LCP, config); + size_type buffer_size = 1000000; + int_vector_buffer<> lcp_buf(lcp_file, std::ios::out, buffer_size, lcp_width); + lcp_buf[0] = 0; + sa_buf.buffersize(buffer_size); + for (size_type i = 1; i < n; ++i) + { + size_type sai = sa_buf[i]; + lcp_buf[i] = plcp[sai]; + } + lcp_buf.close(); + register_cache_file(conf::KEY_LCP, config); +} +inline void construct_lcp_semi_extern_PHI(cache_config & config) +{ + typedef int_vector<>::size_type size_type; + int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, config)); + size_type n = sa_buf.size(); + if (1 == n) + { + int_vector<> lcp(1, 0); + store_to_cache(lcp, conf::KEY_LCP, config); + return; + } + const uint8_t log_q = 6; + const uint32_t q = 1 << log_q; + const uint64_t modq = bits::lo_set[log_q]; + int_vector<64> plcp((n - 1 + q) >> log_q); + for (size_type i = 0, sai_1 = 0; i < n; ++i) + { + size_type sai = sa_buf[i]; + if ((sai & modq) == 0) + { + if ((sai >> log_q) >= plcp.size()) + { + } + plcp[sai >> log_q] = sai_1; + } + sai_1 = sai; + } + int_vector<8> text; + load_from_cache(text, conf::KEY_TEXT, config); + for (size_type i = 0, j, k, l = 0; i < plcp.size(); ++i) + { + j = i << log_q; + k = plcp[i]; + while (text[j + l] == text[k + l]) + ++l; + plcp[i] = l; + if (l >= q) + { + l -= q; + } + else + { + l = 0; + } + } + size_type buffer_size = 4000000; + sa_buf.buffersize(buffer_size); + int_vector_buffer<> lcp_out_buf(cache_file_name(conf::KEY_LCP, config), + std::ios::out, + buffer_size, + sa_buf.width()); + for (size_type i = 0, sai_1 = 0, l = 0, sai = 0, iq = 0; i < n; ++i) + { + sai = sa_buf[i]; + if ((sai & modq) == 0) + { + lcp_out_buf[i] = l = plcp[sai >> log_q]; + } + else + { + iq = sai & bits::lo_unset[log_q]; + l = plcp[sai >> log_q]; + if (l > (sai - iq)) + l -= (sai - iq); + else + l = 0; + while (text[sai + l] == text[sai_1 + l]) + ++l; + lcp_out_buf[i] = l; + } +#ifdef CHECK_LCP + size_type j = 0; + for (j = 0; j < l; ++j) + { + if (text[sai + j] != text[sai_1 + j]) + { + std::cout << "lcp[" << i << "]=" << l << " is two big! " << j << " is right!" + << " sai=" << sai << std::endl; + if ((sai & modq) != 0) + std::cout << " plcp[sai>>log_q]=" << plcp[sai >> log_q] << " sai-iq=" << sai - iq << " sai=" << sai + << " sai-iq=" << sai - iq << std::endl; + break; + } + } +#endif + sai_1 = sai; + } + lcp_out_buf.close(); + register_cache_file(conf::KEY_LCP, config); + return; +} +inline void construct_lcp_go(cache_config & config) +{ + typedef int_vector<>::size_type size_type; +#ifdef STUDY_INFORMATIONS + size_type racs = 0; + size_type matches = 0; + size_type comps2 = 0; +#endif + int_vector<8> text; + load_from_cache(text, conf::KEY_TEXT, config); + int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, config)); + const size_type n = sa_buf.size(); + const size_type m = 254; + if (1 == n) + { + int_vector<> lcp(1, 0); + store_to_cache(lcp, conf::KEY_LCP, config); + return; + } + size_type cnt_c[257] = {0}; + size_type cnt_cc[257] = {0}; + size_type cnt_cc2[257] = {0}; + size_type omitted_c[257] = {0}; + size_type prev_occ_in_bwt[256] = {0}; + for (size_type i = 0; i < 256; ++i) + prev_occ_in_bwt[i] = (size_type)-1; + unsigned char alphabet[257] = {0}; + uint8_t sigma = 0; + tLI m_list[2][256]; + size_type m_char_count[2] = {0}; + uint8_t m_chars[2][256] = {{0}, {0}}; + size_type nn = 0; + { + int_vector<8> lcp_sml(n, + 0); + for (size_type i = 0; i < n; ++i) + { + ++cnt_c[text[i] + 1]; + } + for (int i = 1; i < 257; ++i) + { + if (cnt_c[i] > 0) + { + alphabet[sigma++] = (unsigned char)(i - 1); + } + cnt_cc[i] = cnt_c[i] + cnt_cc[i - 1]; + } + alphabet[sigma] = '\0'; + { + int_vector_buffer<8> bwt_buf(cache_file_name(conf::KEY_BWT, config)); + size_type sai_1 = sa_buf[0]; + uint8_t bwti_1 = bwt_buf[0]; + lcp_sml[cnt_cc[bwti_1]++] = 0; + prev_occ_in_bwt[bwti_1] = 0; + ++omitted_c[alphabet[0]]; + int_vector<64> rmq_stack(2 * (m + 10)); + rmq_stack[0] = 0; + rmq_stack[1] = 0; + rmq_stack[2] = 1; + rmq_stack[3] = 0; + size_type rmq_end = 3; + const size_type m_mod2 = m % 2; + uint8_t cur_c = alphabet[1]; + size_type big_val = 0; + for (size_type i = 1, sai, cur_c_idx = 1, cur_c_cnt = cnt_c[alphabet[1] + 1]; i < n; ++i, --cur_c_cnt) + { + uint8_t bwti = bwt_buf[i]; + sai = sa_buf[i]; + size_type lf = cnt_cc[bwti]; + if (!cur_c_cnt) + { + if (cur_c_cnt < sigma) + { + cur_c_cnt = cnt_c[(cur_c = alphabet[++cur_c_idx]) + 1]; + } + } + size_type l = 0; + if (i >= cnt_cc[cur_c]) + { + if (bwti == bwti_1 and lf < i) + { + l = lcp_sml[lf] ? lcp_sml[lf] - 1 : 0; + if (l == m) + { + l += (text[sai_1 + m] == text[sai + m]); +#ifdef STUDY_INFORMATIONS + if ((sai_1 ^ sai) >> 6) + ++racs; +#endif + } + lcp_sml[i] = l; + } + else + { + if (lf < i) + l = lcp_sml[lf] ? lcp_sml[lf] - 1 : 0; +#ifdef STUDY_INFORMATIONS + if ((sai_1 ^ sai) >> 6) + ++racs; +#endif + while (text[sai_1 + l] == text[sai + l] and l < m + 1) + { + ++l; +#ifdef STUDY_INFORMATIONS + ++matches; +#endif + } + lcp_sml[i] = l; + } + } + else + { + l = lcp_sml[i]; + } + if (l > m) + { + ++big_val; + if (i > 10000 and i < 10500 and big_val > 3000) + { + util::clear(text); + util::clear(lcp_sml); + construct_lcp_PHI<8>(config); + return; + } + } + size_type x = l + 1; + size_type j = rmq_end; + while (x <= rmq_stack[j]) + j -= 2; + rmq_stack[++j] = i + 1; + rmq_stack[++j] = x; + rmq_end = j; + if (lf > i) + { + size_type x_pos = prev_occ_in_bwt[bwti] + 2; + j = rmq_end - 3; + while (x_pos <= rmq_stack[j]) + j -= 2; + lcp_sml[lf] = + rmq_stack[j + 3] - (rmq_stack[j + 3] == m + 2); + } + if (l >= m) + { + if (l == m) + push_front_m_index(nn, cur_c, m_list[m_mod2], m_chars[m_mod2], m_char_count[m_mod2]); + ++nn; + } + else + ++omitted_c[cur_c]; + prev_occ_in_bwt[bwti] = i; + ++cnt_cc[bwti]; + sai_1 = sai; + bwti_1 = bwti; + } + } + util::clear(text); + if (n > 1000 and nn > 5 * (n / 6)) + { + util::clear(lcp_sml); + construct_lcp_PHI<8>(config); + return; + } + store_to_cache(lcp_sml, "lcp_sml", config); + } +#ifdef STUDY_INFORMATIONS + std::cout << "# n=" << n << " nn=" << nn << " nn/n=" << ((double)nn) / n << std::endl; +#endif + { + int_vector<> lcp_big(nn, + 0, + bits::hi(n - 1) + 1); + { + bit_vector todo(n, 0); + { + int_vector_buffer<8> lcp_sml_buf(cache_file_name("lcp_sml", config)); + for (size_type i = 0; i < n; ++i) + { + if (lcp_sml_buf[i] >= m) + { + todo[i] = 1; + } + } + } + cnt_cc2[0] = cnt_cc[0] = 0; + for (size_type i = 1, omitted_sum = 0; i < 257; ++i) + { + cnt_cc[i] = cnt_c[i] + cnt_cc[i - 1]; + omitted_sum += omitted_c[i - 1]; + cnt_cc2[i] = cnt_cc[i] - omitted_sum; + } + int_vector_buffer<8> bwt_buf(cache_file_name(conf::KEY_BWT, config)); + for (size_type i = 0, i2 = 0; i < n; ++i) + { + uint8_t b = bwt_buf[i]; + size_type lf_i = cnt_cc[b]; + if (todo[i]) + { + if (todo[lf_i]) + { + lcp_big[i2] = cnt_cc2[b]; + } + ++i2; + } + if (todo[lf_i]) + { + ++cnt_cc2[b]; + } + ++cnt_cc[b]; + } + } + int_vector<8> bwt2(nn), + shift_bwt2(nn); + bit_vector run2(nn + 1); + run2[nn] = 0; + { + int_vector_buffer<8> lcp_sml_buf(cache_file_name("lcp_sml", config)); + int_vector_buffer<8> bwt_buf(cache_file_name(conf::KEY_BWT, config)); + uint8_t b_1 = '\0'; + bool is_run = false; + for (size_type i = 0, i2 = 0; i < n; ++i) + { + uint8_t b = bwt_buf[i]; + if (lcp_sml_buf[i] >= m) + { + bwt2[i2] = b; + shift_bwt2[i2] = b_1; + run2[i2] = is_run; + is_run = true; + ++i2; + } + else + { + is_run = false; + } + b_1 = b; + } + } + bit_vector todo2(nn + 1, 1); + todo2[nn] = 0; + { + size_type m2 = m; + size_type char_ex[256]; + for (size_type i = 0; i < 256; ++i) + char_ex[i] = nn; + size_type char_occ = 0; + size_type m_mod2 = m2 % 2, mm1_mod2 = (m2 + 1) % 2; + while (m_char_count[m_mod2] > 0) + { + ++m2; + mm1_mod2 = (m2 + 1) % 2, m_mod2 = m2 % 2; + m_char_count[m_mod2] = 0; + std::sort(m_chars[mm1_mod2], + m_chars[mm1_mod2] + m_char_count[mm1_mod2]); + for (size_type mc = 0; mc < m_char_count[mm1_mod2]; ++mc) + { + tLI & mm1_mc_list = m_list[mm1_mod2][m_chars[mm1_mod2][m_char_count[mm1_mod2] - 1 - mc]]; + while (!mm1_mc_list.empty()) + { + size_type i = mm1_mc_list.front(); + mm1_mc_list.pop_front(); + for (size_type k = i; todo2[k]; --k) + { +#ifdef STUDY_INFORMATIONS + ++comps2; +#endif + uint8_t b = shift_bwt2[k]; + if (char_ex[b] != i) + { + char_ex[b] = i; + ++char_occ; + } + if (!run2[k]) + break; + } + for (size_type k = i; todo2[k] and char_occ; ++k) + { +#ifdef STUDY_INFORMATIONS + ++comps2; +#endif + uint8_t b = bwt2[k]; + if (char_ex[b] == i) + { + size_type p = lcp_big[k]; + push_back_m_index(p, b, m_list[m_mod2], m_chars[m_mod2], m_char_count[m_mod2]); + char_ex[b] = nn; + --char_occ; + } + if (!run2[k + 1]) + break; + } + lcp_big[i] = m2 - 1; + todo2[i] = 0; + } + } + } + } + store_to_cache(lcp_big, "lcp_big", config); + } + { + const size_type buffer_size = 1000000; + int_vector_buffer<> lcp_big_buf(cache_file_name("lcp_big", + config)); + int_vector_buffer<8> lcp_sml_buf(cache_file_name("lcp_sml", config), + std::ios::in, + buffer_size); + int_vector_buffer<> lcp_buf(cache_file_name(conf::KEY_LCP, config), + std::ios::out, + buffer_size, + lcp_big_buf.width()); + for (size_type i = 0, i2 = 0; i < n; ++i) + { + size_type l = lcp_sml_buf[i]; + if (l >= m) + { + l = lcp_big_buf[i2]; + ++i2; + } + lcp_buf[i] = l; + } + lcp_buf.close(); + } + register_cache_file(conf::KEY_LCP, config); +#ifdef STUDY_INFORMATIONS + std::cout << "# racs: " << racs << std::endl; + std::cout << "# matches: " << matches << std::endl; + std::cout << "# comps2: " << comps2 << std::endl; +#endif + return; +} +inline void construct_lcp_goPHI(cache_config & config) +{ + typedef int_vector<>::size_type size_type; + int_vector<8> text; + load_from_cache(text, conf::KEY_TEXT, config); + int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, config)); + const size_type n = sa_buf.size(); + const size_type m = 254; + if (1 == n) + { + int_vector<> lcp(1, 0); + store_to_cache(lcp, conf::KEY_LCP, config); + return; + } + size_type cnt_c[257] = {0}; + size_type cnt_cc[257] = {0}; + size_type omitted_c[257] = {0}; + size_type prev_occ_in_bwt[256] = {0}; + for (size_type i = 0; i < 256; ++i) + prev_occ_in_bwt[i] = (size_type)-1; + unsigned char alphabet[257] = {0}; + uint8_t sigma = 0; + size_type nn = 0; + { + int_vector<8> lcp_sml(n, + 0); + for (size_type i = 0; i < n; ++i) + { + ++cnt_c[text[i] + 1]; + } + for (int i = 1; i < 257; ++i) + { + if (cnt_c[i] > 0) + { + alphabet[sigma++] = (unsigned char)(i - 1); + } + cnt_cc[i] = cnt_c[i] + cnt_cc[i - 1]; + } + alphabet[sigma] = '\0'; + { + int_vector_buffer<8> bwt_buf(cache_file_name(conf::KEY_BWT, config)); + size_type sai_1 = sa_buf[0]; + uint8_t bwti_1 = bwt_buf[0]; + lcp_sml[cnt_cc[bwti_1]++] = 0; + prev_occ_in_bwt[bwti_1] = 0; + ++omitted_c[alphabet[0]]; + int_vector<64> rmq_stack(2 * (m + 10)); + rmq_stack[0] = 0; + rmq_stack[1] = 0; + rmq_stack[2] = 1; + rmq_stack[3] = 0; + size_type rmq_end = 3; + uint8_t cur_c = alphabet[1]; + for (size_type i = 1, sai, cur_c_idx = 1, cur_c_cnt = cnt_c[alphabet[1] + 1]; i < n; ++i, --cur_c_cnt) + { + uint8_t bwti = bwt_buf[i]; + sai = sa_buf[i]; + size_type lf = cnt_cc[bwti]; + if (!cur_c_cnt) + { + if (cur_c_cnt < sigma) + { + cur_c_cnt = cnt_c[(cur_c = alphabet[++cur_c_idx]) + 1]; + } + } + size_type l = 0; + if (i >= cnt_cc[cur_c]) + { + if (bwti == bwti_1 and lf < i) + { + l = lcp_sml[lf] ? lcp_sml[lf] - 1 : 0; + if (l == m) + { + l += (text[sai_1 + m] == text[sai + m]); + } + lcp_sml[i] = l; + } + else + { + if (lf < i) + l = lcp_sml[lf] ? lcp_sml[lf] - 1 : 0; + while (text[sai_1 + l] == text[sai + l] and l < m + 1) + { + ++l; + } + lcp_sml[i] = l; + } + } + else + { + l = lcp_sml[i]; + } + size_type x = l + 1; + size_type j = rmq_end; + while (x <= rmq_stack[j]) + j -= 2; + rmq_stack[++j] = i + 1; + rmq_stack[++j] = x; + rmq_end = j; + if (lf > i) + { + size_type x_pos = prev_occ_in_bwt[bwti] + 2; + j = rmq_end - 3; + while (x_pos <= rmq_stack[j]) + j -= 2; + lcp_sml[lf] = + rmq_stack[j + 3] - (rmq_stack[j + 3] == m + 2); + } + if (l > m) + { + ++nn; + } + else + ++omitted_c[cur_c]; + prev_occ_in_bwt[bwti] = i; + ++cnt_cc[bwti]; + sai_1 = sai; + bwti_1 = bwti; + } + } + store_to_cache(lcp_sml, "lcp_sml", config); + } + { + int_vector<> lcp_big(0, 0, bits::hi(n - 1) + 1); + { + memory_monitor::event("lcp-init-phi-begin"); + size_type sa_n_1 = 0; + bit_vector todo(n, 0); + { + int_vector_buffer<8> lcp_sml_buf(cache_file_name("lcp_sml", config)); + for (size_type i = 0; i < n; ++i) + { + if (lcp_sml_buf[i] > m) + { + todo[sa_buf[i]] = 1; + } + } + sa_n_1 = sa_buf[n - 1]; + } + rank_support_v<> todo_rank(&todo); + const size_type bot = sa_n_1; + int_vector<> phi(nn, bot, bits::hi(n - 1) + 1); + int_vector_buffer<8> bwt_buf(cache_file_name(conf::KEY_BWT, config)); + int_vector_buffer<8> lcp_sml_buf(cache_file_name("lcp_sml", config)); + uint8_t b_1 = 0; + for (size_type i = 0, sai_1 = 0; i < n; ++i) + { + uint8_t b = bwt_buf[i]; + size_type sai = sa_buf[i]; + if (lcp_sml_buf[i] > m and b != b_1) + { + phi[todo_rank(sai)] = sai_1; + } + b_1 = b; + sai_1 = sai; + } + memory_monitor::event("lcp-init-phi-end"); + memory_monitor::event("lcp-calc-plcp-begin"); + for (size_type i = 0, ii = 0, l = m + 1, p = 0; i < n and ii < nn; ++i) + { + if (todo[i]) + { + if (i > 0 and todo[i - 1]) + l = l - 1; + else + l = m + 1; + if ((p = phi[ii]) != bot) + { + while (text[i + l] == text[p + l]) + ++l; + } + phi[ii++] = l; + } + } + memory_monitor::event("lcp-calc-plcp-end"); + util::clear(text); + memory_monitor::event("lcp-calc-lcp-begin"); + lcp_big.resize(nn); + for (size_type i = 0, ii = 0; i < n and ii < nn; ++i) + { + if (lcp_sml_buf[i] > m) + { + lcp_big[ii++] = phi[todo_rank(sa_buf[i])]; + } + } + memory_monitor::event("lcp-calc-lcp-end"); + } + store_to_cache(lcp_big, "lcp_big", config); + } + { + const size_type buffer_size = 1000000; + int_vector_buffer<> lcp_big_buf(cache_file_name("lcp_big", + config)); + int_vector_buffer<8> lcp_sml_buf(cache_file_name("lcp_sml", config), + std::ios::in, + buffer_size); + int_vector_buffer<> lcp_buf(cache_file_name(conf::KEY_LCP, config), + std::ios::out, + buffer_size, + lcp_big_buf.width()); + for (size_type i = 0, i2 = 0; i < n; ++i) + { + size_type l = lcp_sml_buf[i]; + if (l > m) + { + l = lcp_big_buf[i2]; + ++i2; + } + lcp_buf[i] = l; + } + lcp_big_buf.close(true); + lcp_sml_buf.close(true); + } + register_cache_file(conf::KEY_LCP, config); + return; +} +template , select_support_scan<1>, select_support_scan<0>>> +inline void construct_lcp_bwt_based(cache_config & config) +{ + typedef int_vector<>::size_type size_type; + std::string lcp_file = cache_file_name(conf::KEY_LCP, config); + memory_monitor::event("lcp-bwt-create-wt-huff-begin"); + t_wt wt_bwt; + construct(wt_bwt, cache_file_name(conf::KEY_BWT, config)); + uint64_t n = wt_bwt.size(); + memory_monitor::event("lcp-bwt-create-wt-huff-end"); + memory_monitor::event("lcp-bwt-init-begin"); + size_type lcp_value = 0; + size_type lcp_value_offset = 0; + size_type phase = 0; + size_type intervals = 0; + size_type intervals_new = 0; + std::queue q; + std::vector dict(2); + size_type source = 0, target = 1; + bool queue_used = true; + size_type use_queue_and_wt = n / 2048; + size_type quantity; + std::vector cs(wt_bwt.sigma); + std::vector rank_c_i(wt_bwt.sigma); + std::vector rank_c_j(wt_bwt.sigma); + size_type bb = (n * 20 - size_in_bytes(wt_bwt) * 8 * 1.25 - 5 * n) + / n; + if (n * 20 < size_in_bytes(wt_bwt) * 8 * 1.25 + 5 * n) + { + bb = 6; + } + bb = std::min(bb, (size_type)8); + size_type lcp_value_max = (1ULL << bb) - 1; + size_type space_in_bit_for_lcp = n * bb; +#ifdef STUDY_INFORMATIONS + std::cout << "# l=" << n << " b=" << (int)bb << " lcp_value_max=" << lcp_value_max + << " size_in_bytes(wt_bwt)=" << size_in_bytes(wt_bwt) << std::endl; +#endif + int_vector<> partial_lcp(n, 0, bb); + bit_vector index_done(n + 1, false); + rank_support_v<> ds_rank_support; + std::vector C; + create_C_array(C, wt_bwt); + memory_monitor::event("lcp-bwt-init-begin-end"); + memory_monitor::event("lcp-bwt-calc-values-begin"); + partial_lcp[0] = 0; + index_done[0] = true; + interval_symbols(wt_bwt, 0, n, quantity, cs, rank_c_i, rank_c_j); + for (size_type i = 0; i < quantity; ++i) + { + unsigned char c = cs[i]; + size_type a_new = C[c] + rank_c_i[i]; + size_type b_new = C[c] + rank_c_j[i]; + if (!index_done[b_new]) + { + if (b_new < n) + partial_lcp[b_new] = lcp_value; + index_done[b_new] = true; + q.push(a_new); + q.push(b_new); + ++intervals; + } + } + ++lcp_value; + while (intervals) + { + if (intervals < use_queue_and_wt && !queue_used) + { + memory_monitor::event("lcp-bwt-bitvec2queue-begin"); + util::clear(dict[target]); + size_type a2 = util::next_bit(dict[source], 0); + size_type b2 = util::next_bit(dict[source], a2 + 1); + while (b2 < dict[source].size()) + { + q.push((a2 - 1) >> 1); + q.push(b2 >> 1); + a2 = util::next_bit(dict[source], b2 + 1); + b2 = util::next_bit(dict[source], a2 + 1); + } + util::clear(dict[source]); + memory_monitor::event("lcp-bwt-bitvec2queue-end"); + } + if (intervals >= use_queue_and_wt && queue_used) + { + memory_monitor::event("lcp-bwt-queue2bitvec-begin"); + dict[source].resize(2 * (n + 1)); + util::set_to_value(dict[source], 0); + while (!q.empty()) + { + dict[source][(q.front() << 1) + 1] = 1; + q.pop(); + dict[source][(q.front() << 1)] = 1; + q.pop(); + } + dict[target].resize(2 * (n + 1)); + util::set_to_value(dict[target], 0); + memory_monitor::event("lcp-bwt-queue2bitvec-end"); + } + if (intervals < use_queue_and_wt) + { + queue_used = true; + intervals_new = 0; + while (intervals) + { + size_type a = q.front(); + q.pop(); + size_type b = q.front(); + q.pop(); + --intervals; + interval_symbols(wt_bwt, a, b, quantity, cs, rank_c_i, rank_c_j); + for (size_type i = 0; i < quantity; ++i) + { + unsigned char c = cs[i]; + size_type a_new = C[c] + rank_c_i[i]; + size_type b_new = C[c] + rank_c_j[i]; + if (!index_done[b_new] and phase == 0) + { + partial_lcp[b_new] = lcp_value; + index_done[b_new] = true; + q.push(a_new); + q.push(b_new); + ++intervals_new; + } + else if (!index_done[b_new]) + { + size_type insert_pos = b_new - ds_rank_support.rank(b_new); + if (!partial_lcp[insert_pos]) + { + partial_lcp[insert_pos] = lcp_value - lcp_value_offset; + q.push(a_new); + q.push(b_new); + ++intervals_new; + } + } + } + } + intervals = intervals_new; + } + else + { + queue_used = false; + intervals = 0; + size_type a2 = util::next_bit(dict[source], 0); + size_type b2 = util::next_bit(dict[source], a2 + 1); + while (b2 < dict[source].size()) + { + interval_symbols(wt_bwt, ((a2 - 1) >> 1), (b2 >> 1), quantity, cs, rank_c_i, rank_c_j); + for (size_type i = 0; i < quantity; ++i) + { + unsigned char c = cs[i]; + size_type a_new = C[c] + rank_c_i[i]; + size_type b_new = C[c] + rank_c_j[i]; + if (!index_done[b_new] and phase == 0) + { + partial_lcp[b_new] = lcp_value; + index_done[b_new] = true; + dict[target][(a_new << 1) + 1] = 1; + dict[target][(b_new << 1)] = 1; + ++intervals; + } + else if (!index_done[b_new]) + { + size_type insert_pos = b_new - ds_rank_support.rank(b_new); + if (!partial_lcp[insert_pos]) + { + partial_lcp[insert_pos] = lcp_value - lcp_value_offset; + dict[target][(a_new << 1) + 1] = 1; + dict[target][(b_new << 1)] = 1; + ++intervals; + } + } + } + a2 = util::next_bit(dict[source], b2 + 1); + b2 = util::next_bit(dict[source], a2 + 1); + } + std::swap(source, target); + util::set_to_value(dict[target], 0); + } + ++lcp_value; + if (lcp_value >= lcp_value_max) + { + memory_monitor::event("lcp-bwt-write-to-file-begin"); + if (phase) + { + insert_lcp_values(partial_lcp, index_done, lcp_file, lcp_value, lcp_value_offset); + } + else + { + store_to_file(partial_lcp, lcp_file); + } + memory_monitor::event("lcp-bwt-write-to-file-end"); + memory_monitor::event("lcp-bwt-resize-variables-begin"); + util::init_support(ds_rank_support, &index_done); + lcp_value_offset = lcp_value_max - 1; + size_type remaining_lcp_values = index_done.size() - ds_rank_support.rank(index_done.size()); + uint8_t int_width_new = + std::min(space_in_bit_for_lcp / remaining_lcp_values, (size_type)bits::hi(n - 1) + 1); + lcp_value_max = lcp_value_offset + (1ULL << int_width_new); +#ifdef STUDY_INFORMATIONS + std::cout << "# l=" << remaining_lcp_values << " b=" << (int)int_width_new + << " lcp_value_max=" << lcp_value_max << std::endl; +#endif + partial_lcp.width(int_width_new); + partial_lcp.resize(remaining_lcp_values); + partial_lcp.shrink_to_fit(); + util::set_to_value(partial_lcp, 0); + ++phase; + memory_monitor::event("lcp-bwt-resize-variables-end"); + } + } + memory_monitor::event("lcp-bwt-calc-values-end"); + memory_monitor::event("lcp-bwt-merge-to-file-begin"); + if (phase) + { + insert_lcp_values(partial_lcp, index_done, lcp_file, lcp_value, lcp_value_offset); + } + else + { + store_to_file(partial_lcp, lcp_file); + } + register_cache_file(conf::KEY_LCP, config); + memory_monitor::event("lcp-bwt-merge-to-file-end"); +} +template , select_support_scan<1>, select_support_scan<0>>> +void construct_lcp_bwt_based2(cache_config & config) +{ + typedef int_vector<>::size_type size_type; + uint64_t n; + size_type buffer_size = 1000000; + size_type lcp_value = 0; + std::string tmp_lcp_file = cache_file_name(conf::KEY_LCP, config) + "_tmp"; + { + memory_monitor::event("lcp-bwt2-create-wt-huff-begin"); + t_wt wt_bwt; + construct(wt_bwt, cache_file_name(conf::KEY_BWT, config)); + n = wt_bwt.size(); + memory_monitor::event("lcp-bwt2-create-wt-huff-begin"); + memory_monitor::event("lcp-bwt2-init-begin"); + size_type intervals = 0; + size_type intervals_new = 0; + std::queue q; + std::vector dict(2); + size_type source = 0, target = 1; + bool queue_used = true; + size_type use_queue_and_wt = n / 2048; + size_type quantity; + std::vector cs(wt_bwt.sigma); + std::vector rank_c_i(wt_bwt.sigma); + std::vector rank_c_j(wt_bwt.sigma); + bool new_lcp_value = false; + uint8_t int_width = bits::hi(n) + 2; + int_vector_buffer<> lcp_positions_buf(tmp_lcp_file, + std::ios::out, + buffer_size, + int_width); + size_type idx_out_buf = 0; + bit_vector index_done(n + 1, 0); + std::vector C; + create_C_array(C, wt_bwt); + memory_monitor::event("lcp-bwt2-init-end"); + memory_monitor::event("lcp-bwt2-calc-values-begin"); + lcp_positions_buf[idx_out_buf++] = 0; + if (new_lcp_value) + { + lcp_positions_buf[idx_out_buf - 1] = lcp_positions_buf[idx_out_buf - 1] + n; + new_lcp_value = false; + } + index_done[0] = true; + interval_symbols(wt_bwt, 0, n, quantity, cs, rank_c_i, rank_c_j); + for (size_type i = 0; i < quantity; ++i) + { + unsigned char c = cs[i]; + size_type a_new = C[c] + rank_c_i[i]; + size_type b_new = C[c] + rank_c_j[i]; + if (!index_done[b_new]) + { + if (b_new < n) + { + lcp_positions_buf[idx_out_buf++] = b_new; + } + index_done[b_new] = true; + q.push(a_new); + q.push(b_new); + ++intervals; + } + } + ++lcp_value; + new_lcp_value = true; + while (intervals) + { + if (intervals < use_queue_and_wt && !queue_used) + { + memory_monitor::event("lcp-bwt2-bitvec2queue-begin"); + util::clear(dict[target]); + size_type a2 = util::next_bit(dict[source], 0); + size_type b2 = util::next_bit(dict[source], a2 + 1); + while (b2 < dict[source].size()) + { + q.push((a2 - 1) >> 1); + q.push(b2 >> 1); + a2 = util::next_bit(dict[source], b2 + 1); + b2 = util::next_bit(dict[source], a2 + 1); + } + util::clear(dict[source]); + memory_monitor::event("lcp-bwt2-bitvec2queue-end"); + } + if (intervals >= use_queue_and_wt && queue_used) + { + memory_monitor::event("lcp-bwt2-queue2bitvec-begin"); + dict[source].resize(2 * (n + 1)); + util::set_to_value(dict[source], 0); + while (!q.empty()) + { + dict[source][(q.front() << 1) + 1] = 1; + q.pop(); + dict[source][(q.front() << 1)] = 1; + q.pop(); + } + dict[target].resize(2 * (n + 1)); + util::set_to_value(dict[target], 0); + memory_monitor::event("lcp-bwt2-queue2bitvec-end"); + } + if (intervals < use_queue_and_wt) + { + queue_used = true; + intervals_new = 0; + while (intervals) + { + size_type a = q.front(); + q.pop(); + size_type b = q.front(); + q.pop(); + --intervals; + interval_symbols(wt_bwt, a, b, quantity, cs, rank_c_i, rank_c_j); + for (size_type i = 0; i < quantity; ++i) + { + unsigned char c = cs[i]; + size_type a_new = C[c] + rank_c_i[i]; + size_type b_new = C[c] + rank_c_j[i]; + if (!index_done[b_new]) + { + lcp_positions_buf[idx_out_buf++] = b_new; + if (new_lcp_value) + { + lcp_positions_buf[idx_out_buf - 1] = lcp_positions_buf[idx_out_buf - 1] + n; + new_lcp_value = false; + } + index_done[b_new] = true; + q.push(a_new); + q.push(b_new); + ++intervals_new; + } + } + } + intervals = intervals_new; + } + else + { + queue_used = false; + intervals = 0; + size_type a2 = util::next_bit(dict[source], 0); + size_type b2 = util::next_bit(dict[source], a2 + 1); + while (b2 < dict[source].size()) + { + interval_symbols(wt_bwt, ((a2 - 1) >> 1), (b2 >> 1), quantity, cs, rank_c_i, rank_c_j); + for (size_type i = 0; i < quantity; ++i) + { + unsigned char c = cs[i]; + size_type a_new = C[c] + rank_c_i[i]; + size_type b_new = C[c] + rank_c_j[i]; + if (!index_done[b_new]) + { + lcp_positions_buf[idx_out_buf++] = b_new; + if (new_lcp_value) + { + lcp_positions_buf[idx_out_buf - 1] = lcp_positions_buf[idx_out_buf - 1] + n; + new_lcp_value = false; + } + index_done[b_new] = true; + dict[target][(a_new << 1) + 1] = 1; + dict[target][(b_new << 1)] = 1; + ++intervals; + } + } + a2 = util::next_bit(dict[source], b2 + 1); + b2 = util::next_bit(dict[source], a2 + 1); + } + std::swap(source, target); + util::set_to_value(dict[target], 0); + } + ++lcp_value; + new_lcp_value = true; + } + memory_monitor::event("lcp-bwt2-calc-values-end"); + lcp_positions_buf.close(); + } + { + memory_monitor::event("lcp-bwt2-reordering-begin"); + int_vector_buffer<> lcp_positions(tmp_lcp_file, std::ios::in, buffer_size); + uint8_t int_width = bits::hi(lcp_value + 1) + 1; + size_type number_of_values = ((n / ((int_width - 1ULL) / 8 + 1) + 16) & (~(0x7ULL))); + std::string lcp_file = cache_file_name(conf::KEY_LCP, config); + int_vector_buffer<> lcp_array(lcp_file, + std::ios::out, + number_of_values * int_width / 8, + int_width); + number_of_values = lcp_array.buffersize() * 8 / int_width; + for (size_type position_begin = 0, position_end = number_of_values; position_begin < n and number_of_values > 0; + position_begin = position_end, position_end += number_of_values) + { +#ifdef STUDY_INFORMATIONS + std::cout << "# number_of_values=" << number_of_values << " fill lcp_values with " << position_begin + << " <= position <" << position_end << ", each lcp-value has " << (int)int_width + << " bit, lcp_value_max=" << lcp_value << " n=" << n << std::endl; +#endif + lcp_value = 0; + for (size_type i = 0; i < n; ++i) + { + size_type position = lcp_positions[i]; + if (position > n) + { + position -= n; + ++lcp_value; + } + if (position_begin <= position and position < position_end) + { + lcp_array[position] = lcp_value; + } + } + } + lcp_array.close(); + register_cache_file(conf::KEY_LCP, config); + lcp_positions.close(true); + memory_monitor::event("lcp-bwt2-reordering-end"); + } +} +} +#endif +#ifndef INCLUDED_SDSL_CONSTRUCT_SA +#define INCLUDED_SDSL_CONSTRUCT_SA +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_CONSTRUCT_CONFIG +#define INCLUDED_SDSL_CONSTRUCT_CONFIG +namespace sdsl +{ +struct construct_config_data +{ + byte_sa_algo_type byte_algo_sa = LIBDIVSUFSORT; +}; +extern inline construct_config_data & construct_config() +{ + static construct_config_data data; + return data; +} +} +#endif +#ifndef SDSL_CONSTRUCT_SA_SE +#define SDSL_CONSTRUCT_SA_SE +#include +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +template +uint64_t _get_next_lms_position(int_vector_type & text, uint64_t i) +{ + if (i >= text.size() - 3) + { + return text.size() - 1; + } + uint64_t ci = text[i], cip1 = text[i + 1]; + while (ci <= cip1) + { + ++i; + ci = cip1; + cip1 = text[i + 1]; + } + uint64_t candidate = i + 1; + while (ci >= cip1) + { + if (ci > cip1) + { + if (i + 1 == text.size() - 1) + { + return text.size() - 1; + } + candidate = i + 1; + } + ++i; + ci = cip1; + cip1 = text[i + 1]; + } + return candidate; +} +inline void _construct_sa_IS(int_vector<> & text, + int_vector<> & sa, + std::string & filename_sa, + size_t n, + size_t text_offset, + size_t sigma, + uint64_t recursion) +{ + uint64_t buffersize = 1024 * 1024 / 8; + size_t name = 0; + size_t number_of_lms_strings = 0; + std::string filename_c_array = tmp_file(filename_sa, "_c_array" + util::to_string(recursion)); + { + std::vector bkt(sigma, 0); + for (size_t i = 0; i < n; ++i) + { + ++bkt[text[text_offset + i]]; + } + int_vector_buffer<> c_array(filename_c_array, std::ios::out, buffersize, 64); + for (size_t c = 0; c < sigma; ++c) + { + c_array[c] = bkt[c]; + } + bkt[0] = 0; + for (size_t c = 1; c < sigma; ++c) + { + bkt[c] = bkt[c - 1] + bkt[c]; + } + for (size_t i = n - 2, was_s_typ = 1; i < n; --i) + { + if (text[text_offset + i] > text[text_offset + i + 1]) + { + if (was_s_typ) + { + sa[bkt[text[text_offset + i + 1]]--] = i + 1; + ++number_of_lms_strings; + was_s_typ = 0; + } + } + else if (text[text_offset + i] < text[text_offset + i + 1]) + { + was_s_typ = 1; + } + } + bkt[0] = 0; + for (size_t c = 1; c < sigma; ++c) + { + bkt[c] = bkt[c - 1] + c_array[c - 1]; + } + for (size_t i = 0; i < n; ++i) + { + if (sa[i] > 0 and text[text_offset + sa[i]] <= text[text_offset + sa[i] - 1]) + { + sa[bkt[text[text_offset + sa[i] - 1]]++] = sa[i] - 1; + sa[i] = 0; + } + } + bkt[0] = 0; + for (size_t c = 1; c < sigma; ++c) + { + bkt[c] = bkt[c - 1] + c_array[c]; + } + c_array.close(); + for (size_t i = n - 1, endpointer = n; i < n; --i) + { + if (sa[i] > 0) + { + if (text[text_offset + sa[i] - 1] <= text[text_offset + sa[i]]) + { + sa[bkt[text[text_offset + sa[i] - 1]]--] = sa[i] - 1; + } + else + { + sa[--endpointer] = sa[i]; + } + sa[i] = 0; + } + } + for (size_t i = n - 2, end = n - 2, was_s_typ = 1; i < n; --i) + { + if (text[text_offset + i] > text[text_offset + i + 1]) + { + if (was_s_typ) + { + sa[(i + 1) >> 1] = end - i; + end = i + 1; + was_s_typ = 0; + } + } + else if (text[text_offset + i] < text[text_offset + i + 1]) + { + was_s_typ = 1; + } + } + for (size_t i = n - number_of_lms_strings + 1, cur_pos = 0, cur_len = 0, last_pos = n - 1, last_len = 1; i < n; + ++i) + { + cur_pos = sa[i]; + cur_len = sa[(cur_pos >> 1)]; + if (cur_len == last_len) + { + size_t l = 0; + while (l < cur_len and text[text_offset + cur_pos + l] == text[text_offset + last_pos + l]) + { + ++l; + } + if (l >= cur_len) + { + --name; + } + } + sa[(cur_pos >> 1)] = ++name; + last_pos = cur_pos; + last_len = cur_len; + } + } + if (name + 1 < number_of_lms_strings) + { + for (size_t i = 0, t = n - number_of_lms_strings; i < (n >> 1); ++i) + { + if (sa[i] > 0) + { + sa[t++] = sa[i]; + sa[i] = 0; + } + } + sa[n - 1] = 0; + std::string filename_sa_rec = tmp_file(filename_sa, "_sa_rec" + util::to_string(recursion + 1)); + _construct_sa_IS(sa, + sa, + filename_sa_rec, + number_of_lms_strings, + n - number_of_lms_strings, + name + 1, + recursion + 1); + for (size_t i = n - 2, endpointer = n - 1, was_s_typ = 1; i < n; --i) + { + if (text[text_offset + i] > text[text_offset + i + 1]) + { + if (was_s_typ) + { + sa[endpointer--] = i + 1; + was_s_typ = 0; + } + } + else if (text[text_offset + i] < text[text_offset + i + 1]) + { + was_s_typ = 1; + } + } + for (size_t i = 0; i < number_of_lms_strings; ++i) + { + size_t pos = sa[i]; + sa[i] = sa[n - number_of_lms_strings + pos]; + sa[n - number_of_lms_strings + pos] = 0; + } + } + else + { + sa[0] = n - 1; + for (size_t i = 1; i < number_of_lms_strings; ++i) + { + sa[i] = sa[n - number_of_lms_strings + i]; + sa[n - number_of_lms_strings + i] = 0; + } + for (size_t i = number_of_lms_strings; i < (n >> 1); ++i) + { + sa[i] = 0; + } + } + { + int_vector_buffer<> c_array(filename_c_array, std::ios::in, buffersize, 64); + std::vector bkt(sigma, 0); + for (size_t c = 1; c < sigma; ++c) + { + bkt[c] = bkt[c - 1] + c_array[c]; + } + for (size_t i = number_of_lms_strings - 1; i < n; --i) + { + size_t pos = sa[i]; + sa[i] = 0; + sa[bkt[text[text_offset + pos]]--] = pos; + } + bkt[0] = 0; + for (size_t c = 1; c < sigma; ++c) + { + bkt[c] = bkt[c - 1] + c_array[c - 1]; + } + for (size_t i = 0; i < n; ++i) + { + if (sa[i] > 0 and text[text_offset + sa[i]] <= text[text_offset + sa[i] - 1]) + { + sa[bkt[text[text_offset + sa[i] - 1]]++] = sa[i] - 1; + } + } + bkt[0] = 0; + for (size_t c = 1; c < sigma; ++c) + { + bkt[c] = bkt[c - 1] + c_array[c]; + } + for (size_t i = n - 1; i < n; --i) + { + if (sa[i] > 0 and text[text_offset + sa[i] - 1] <= text[text_offset + sa[i]]) + { + sa[bkt[text[text_offset + sa[i] - 1]]--] = sa[i] - 1; + } + } + c_array.close(true); + } +} +template +void _construct_sa_se(int_vector_type & text, std::string filename_sa, uint64_t sigma, uint64_t recursion) +{ + std::string filename_text = tmp_file(filename_sa, "_text_rec" + util::to_string(recursion)); + store_to_file(text, filename_text); + uint64_t n = text.size(); + uint64_t nsize = bits::hi(n) + 1; + uint8_t int_width = bits::hi(n - 1) + 1; + uint64_t buffersize = 1024 * 1024 / 8; + size_t first_lms_pos = 0; + size_t number_of_lms_strings = 0; + size_t bkt_s_last = 0, bkt_s_sum = 0, bound_s = 0, bkt_l_sum = 0; + int_vector<> C(sigma, 0, int_width); + int_vector<> bkt_lms(sigma, 0, int_width); + int_vector<> bkt_s(sigma, 0, int_width); + int_vector<> bkt_l(sigma, 0, int_width); + std::string filename_lms_pos_b = tmp_file(filename_sa, "_lms_pos_b" + util::to_string(recursion)); + size_t parts = 10; + { + int_vector_buffer<1> lms_pos_b(filename_lms_pos_b, std::ios::out, buffersize, 1); + uint64_t ci = text[n - 1]; + ++C[ci]; + bool was_s_typ = 1; + for (size_t i = n - 2; i < n; --i) + { + uint64_t cip1 = ci; + ci = text[i]; + ++C[ci]; + if (was_s_typ) + { + ++bkt_s[text[i + 1]]; + if (ci > cip1) + { + ++bkt_lms[cip1]; + lms_pos_b[i + 1] = 1; + ++number_of_lms_strings; + first_lms_pos = i + 1; + was_s_typ = 0; + } + } + else if (ci < cip1) + { + was_s_typ = 1; + } + } + if (was_s_typ) + { + ++bkt_s[ci]; + } + bkt_l[0] = C[0] - bkt_s[0]; + for (size_t i = 1; i < C.size(); ++i) + { + bkt_l[i] = C[i] - bkt_s[i]; + C[i] = C[i] + C[i - 1]; + } + lms_pos_b.close(); + } + int_vector_buffer<> right(tmp_file(filename_sa, "_right" + util::to_string(recursion)), + std::ios::out, + buffersize, + nsize); + size_t right_pointer = 0; + int_vector_buffer<> left(tmp_file(filename_sa, "_left" + util::to_string(recursion)), + std::ios::out, + buffersize, + nsize); + size_t left_pointer = 0; + { + for (size_t i = 0, tmp2 = 0, tmp = 0; i < sigma; ++i) + { + tmp += bkt_lms[i]; + bkt_lms[i] = tmp2; + tmp2 = tmp; + } + int_vector_buffer<> lms_positions(tmp_file(filename_sa, "_lms_positions" + util::to_string(recursion)), + std::ios::out, + buffersize, + nsize); + for (size_t i = n - 2, was_s_typ = 1, ci = text[n - 1]; i < n; --i) + { + uint64_t cip1 = ci; + ci = text[i]; + if (ci > cip1) + { + if (was_s_typ) + { + lms_positions.push_back(bkt_lms[cip1]); + lms_positions.push_back(i + 1); + ++bkt_lms[cip1]; + was_s_typ = 0; + } + } + else if (ci < cip1) + { + was_s_typ = 1; + } + } + util::clear(text); + { + int_vector<> lms_strings(number_of_lms_strings, 0, int_width); + for (size_t i = 0; i < lms_positions.size();) + { + size_t idx = lms_positions[i++]; + size_t val = lms_positions[i++]; + lms_strings[idx] = val; + } + lms_positions.close(true); + left_pointer = 0; + for (size_t i = 0; i < number_of_lms_strings; ++i) + { + left[left_pointer++] = lms_strings[number_of_lms_strings - i - 1]; + } + } + load_from_file(text, filename_text); + } + left_pointer--; + { + for (size_t i = 0, tmp = 0; i < sigma; ++i) + { + tmp = bkt_l[i]; + bkt_l[i] = bkt_l_sum; + bkt_l_sum += tmp; + bkt_lms[i] = bkt_l[i]; + } + size_t partsize = bkt_l_sum / parts + 1; + int_vector<> array(partsize, 0, int_width); + std::vector> cached_array(parts - 1); + for (size_t i = 0; i < cached_array.size(); ++i) + { + cached_array[i] = int_vector_buffer<>( + tmp_file(filename_sa, "_rightbuffer" + util::to_string(i) + "_" + util::to_string(recursion)), + std::ios::out, + buffersize, + nsize); + } + for (size_t c = 0, pos = 0, offset = 0; c < sigma; ++c) + { + for (; pos < bkt_l[c]; ++pos) + { + if (pos - offset >= partsize) + { + offset += partsize; + for (size_t i = 0, cur_part = pos / partsize - 1; i < cached_array[cur_part].size();) + { + size_t src = cached_array[cur_part][i++]; + size_t val = cached_array[cur_part][i++]; + array[src - offset] = val; + } + cached_array[pos / partsize - 1].reset(); + } + size_t idx = array[pos - offset]; + if (idx == 0) + { + right[right_pointer++] = idx; + } + else + { + size_t symbol = text[idx - 1]; + if (symbol >= c) + { + size_t val = idx - 1; + size_t src = bkt_l[symbol]; + bkt_l[symbol] = bkt_l[symbol] + 1; + if ((src - offset) / partsize == 0) + { + array[src - offset] = val; + } + else + { + size_t part = src / partsize - 1; + cached_array[part].push_back(src); + cached_array[part].push_back(val); + } + } + else + { + right[right_pointer++] = idx; + } + } + } + while (left_pointer < number_of_lms_strings and text[left[left_pointer]] == c) + { + size_t idx = left[left_pointer--]; + --idx; + size_t symbol = text[idx]; + size_t val = idx; + size_t src = bkt_l[symbol]; + bkt_l[symbol] = bkt_l[symbol] + 1; + if ((src - offset) / partsize == 0) + { + array[src - offset] = val; + } + else + { + size_t part = src / partsize - 1; + cached_array[part].push_back(src); + cached_array[part].push_back(val); + } + } + } + for (size_t i = 0; i < cached_array.size(); ++i) + { + cached_array[i].close(true); + } + for (size_t i = 0; i < sigma; ++i) + { + bkt_l[i] = bkt_lms[i]; + } + } + right_pointer--; + left_pointer = 0; + left.reset(); + { + bkt_s_last = 0, bkt_s_sum = 0; + for (size_t i = 0; i < sigma; ++i) + { + bkt_s_sum += bkt_s[i]; + if (bkt_s[i]) + { + bkt_s[i] = bkt_s_sum; + bkt_s_last = bkt_s_sum; + } + else + { + bkt_s[i] = bkt_s_sum; + } + bkt_lms[i] = bkt_s[i]; + } + bound_s = bkt_s_sum; + for (size_t i = 0; i < sigma; ++i) + { + if (bkt_s[i] > bkt_s_sum / 2) + { + bkt_s_sum = bkt_s[i]; + break; + } + } + size_t partsize = bound_s / parts + 1; + int_vector<> array(partsize, 0, int_width); + std::vector> cached_array(parts - 1); + for (size_t i = 0; i < cached_array.size(); ++i) + { + cached_array[i] = int_vector_buffer<>( + tmp_file(filename_sa, "_leftbuffer" + util::to_string(i) + "_" + util::to_string(recursion)), + std::ios::out, + buffersize, + nsize); + } + for (size_t c = sigma - 1, pos = bkt_s_last - 1, offset = partsize * (parts - 1); c < sigma; --c) + { + for (; pos + 1 > bkt_s[c]; --pos) + { + while (pos < offset) + { + offset -= partsize; + for (size_t i = 0, cur_part = offset / partsize; i < cached_array[cur_part].size();) + { + size_t src = cached_array[cur_part][i++]; + size_t val = cached_array[cur_part][i++]; + array[src - offset] = val; + } + cached_array[offset / partsize].reset(); + } + size_t idx = array[pos - offset]; + if (idx == 0) + { + idx = n; + } + --idx; + size_t symbol = text[idx]; + if (symbol <= c) + { + bkt_s[symbol] = bkt_s[symbol] - 1; + size_t val = idx; + size_t src = bkt_s[symbol]; + if (src >= offset) + { + array[src - offset] = val; + } + else + { + size_t part = src / partsize; + cached_array[part].push_back(src); + cached_array[part].push_back(val); + } + } + else + { + left[left_pointer++] = array[pos - offset]; + } + } + while (right_pointer < number_of_lms_strings and text[right[right_pointer]] == c) + { + size_t idx = right[right_pointer--]; + if (idx == 0) + { + idx = n; + } + --idx; + size_t symbol = text[idx]; + bkt_s[symbol] = bkt_s[symbol] - 1; + size_t val = idx; + size_t src = bkt_s[symbol]; + if (src >= offset) + { + array[src - offset] = val; + } + else + { + size_t part = src / partsize; + cached_array[part].push_back(src); + cached_array[part].push_back(val); + } + } + } + for (size_t i = 0; i < cached_array.size(); ++i) + { + cached_array[i].close(true); + } + for (size_t i = 0; i < sigma; ++i) + { + bkt_s[i] = bkt_lms[i]; + } + } + right.buffersize(0); + right.reset(); + right_pointer = 0; + --left_pointer; + int_vector<1> same_lms(number_of_lms_strings, false); + size_t last_end_pos = first_lms_pos, order = number_of_lms_strings - 1; + same_lms[number_of_lms_strings - 1] = true; + for (size_t i = number_of_lms_strings - 2, a = 0, b = 0, last_a = left[number_of_lms_strings - 1]; + i < number_of_lms_strings; + --i) + { + b = last_a; + a = left[i]; + last_a = a; + size_t end_pos = _get_next_lms_position(text, a); + if (end_pos - a == last_end_pos - b) + { + while (a < end_pos and text[a] == text[b]) + { + ++a; + ++b; + } + if (text[a] == text[b]) + { + same_lms[i] = true; + --order; + } + } + last_end_pos = end_pos; + } + util::clear(text); + int_vector<> text_rec; + if (recursion == 0) + { + text_rec.width((bits::hi(order + 1) + 1)); + } + else + { + text_rec.width((bits::hi(number_of_lms_strings + 1) + 1)); + } + text_rec.resize(number_of_lms_strings); + util::_set_zero_bits(text_rec); + { + if (recursion == 0 and n / 2 * text_rec.width() > 8 * n) + { + size_t size_of_part = n / 4 + 3; + text_rec.resize(size_of_part); + util::_set_zero_bits(text_rec); + order = 0; + for (size_t i = number_of_lms_strings - 1; i < number_of_lms_strings; --i) + { + if (!same_lms[i]) + { + ++order; + } + if (left[i] / 2 >= size_of_part) + { + text_rec[(left[i] / 2) - size_of_part] = order; + } + } + std::string filename_text_rec_part2 = tmp_file(filename_sa, "_text_rec_part2" + util::to_string(recursion)); + size_t pos = 0; + for (size_t i = 0; i < size_of_part; ++i) + { + if (text_rec[i] > 0) + { + text_rec[pos++] = text_rec[i]; + } + } + text_rec.resize(pos); + store_to_file(text_rec, filename_text_rec_part2); + text_rec.resize(size_of_part); + util::_set_zero_bits(text_rec); + order = 0; + for (size_t i = number_of_lms_strings - 1; i < number_of_lms_strings; --i) + { + if (!same_lms[i]) + { + ++order; + } + if (left[i] / 2 < size_of_part) + { + text_rec[left[i] / 2] = order; + } + } + pos = 0; + for (size_t i = 0; i < size_of_part; ++i) + { + if (text_rec[i] > 0) + { + text_rec[pos++] = text_rec[i]; + } + } + text_rec.resize(number_of_lms_strings); + int_vector_buffer<> buf(filename_text_rec_part2, std::ios::in, 1024 * 1024); + for (size_t i = 0; i < buf.size(); ++i) + { + text_rec[pos++] = buf[i]; + } + buf.close(true); + text_rec[number_of_lms_strings - 1] = 0; + } + else + { + text_rec.resize(n / 2 + 1); + util::_set_zero_bits(text_rec); + order = 0; + for (size_t i = number_of_lms_strings - 1; i < number_of_lms_strings; --i) + { + if (!same_lms[i]) + { + ++order; + } + text_rec[left[left_pointer--] / 2] = order; + } + for (size_t i = 0, pos = 0; i < text_rec.size(); ++i) + { + if (text_rec[i] > 0) + { + text_rec[pos++] = text_rec[i]; + } + } + text_rec[number_of_lms_strings - 1] = 0; + text_rec.resize(number_of_lms_strings); + } + } + util::clear(same_lms); + left.buffersize(0); + left.reset(); + int_vector<> isa_rec; + std::string filename_sa_rec = tmp_file(filename_sa, "_sa_rec" + util::to_string(recursion + 1)); + if (text_rec.size() > order + 1) + { + if (recursion == 0) + { + memory_monitor::event("begin _construct_sa"); + _construct_sa_se>(text_rec, filename_sa_rec, order + 1, recursion + 1); + memory_monitor::event("end _construct_sa"); + } + else + { + text_rec.resize(text_rec.size() * 2); + for (size_t i = 0; i < number_of_lms_strings; ++i) + { + text_rec[number_of_lms_strings + i] = text_rec[i]; + text_rec[i] = 0; + } + memory_monitor::event("begin sa_simple"); + _construct_sa_IS(text_rec, + text_rec, + filename_sa_rec, + number_of_lms_strings, + number_of_lms_strings, + order + 1, + recursion + 1); + memory_monitor::event("end sa_simple"); + text_rec.resize(number_of_lms_strings); + store_to_file(text_rec, filename_sa_rec); + } + } + else + { + isa_rec = std::move(text_rec); + } + if (isa_rec.size() > 0) + { + bit_vector lms_pos_b(n); + load_from_file(lms_pos_b, filename_lms_pos_b); + sdsl::remove(filename_lms_pos_b); + select_support_mcl<> lms_select_support; + util::init_support(lms_select_support, &lms_pos_b); + int_vector<> tmp_left(number_of_lms_strings, 0, int_width); + for (size_t i = number_of_lms_strings - 1; i < number_of_lms_strings; --i) + { + size_t idx = isa_rec[i]; + size_t val = lms_select_support.select(i + 1); + tmp_left[idx] = val; + } + util::clear(lms_select_support); + util::clear(lms_pos_b); + util::clear(isa_rec); + left.buffersize(buffersize); + left_pointer = 0; + for (; left_pointer < number_of_lms_strings; ++left_pointer) + { + left[left_pointer] = tmp_left[number_of_lms_strings - left_pointer - 1]; + } + left_pointer--; + util::clear(tmp_left); + } + else + { + left.buffersize(buffersize); + left_pointer = 0; + { + bit_vector lms_pos_b(n); + load_from_file(lms_pos_b, filename_lms_pos_b); + sdsl::remove(filename_lms_pos_b); + select_support_mcl<> lms_select_support; + util::init_support(lms_select_support, &lms_pos_b); + int_vector_buffer<> sa_rec_buf(filename_sa_rec, std::ios::in, buffersize, nsize); + for (uint64_t i = 0; i < sa_rec_buf.size(); ++i) + { + uint64_t pos = lms_select_support.select(sa_rec_buf[i] + 1); + left[number_of_lms_strings - 1 - left_pointer++] = pos; + } + sa_rec_buf.close(true); + left_pointer--; + } + } + load_from_file(text, filename_text); + sdsl::remove(filename_text); + right.buffersize(buffersize); + right_pointer = 0; + int_vector_buffer<> cached_sa(filename_sa, std::ios::out, buffersize, nsize); + size_t sa_pointer = 0; + { + size_t partsize = bkt_l_sum / parts + 1; + int_vector<> array(partsize, 0, int_width); + std::vector> cached_array(parts - 1); + for (size_t i = 0; i < cached_array.size(); ++i) + { + cached_array[i] = int_vector_buffer<>( + tmp_file(filename_sa, "_rightbuffer" + util::to_string(i) + "_" + util::to_string(recursion)), + std::ios::out, + buffersize, + nsize); + } + for (size_t c = 0, pos = 0, offset = 0; c < sigma; ++c) + { + for (; pos < bkt_l[c]; ++pos) + { + if (pos - offset >= partsize) + { + offset += partsize; + for (size_t i = 0, cur_part = pos / partsize - 1; i < cached_array[cur_part].size();) + { + size_t src = cached_array[cur_part][i++]; + size_t val = cached_array[cur_part][i++]; + array[src - offset] = val; + } + cached_array[pos / partsize - 1].reset(); + } + size_t idx = array[pos - offset]; + if (idx == 0) + { + cached_sa[sa_pointer++] = idx; + right[right_pointer++] = idx; + } + else + { + size_t symbol = text[idx - 1]; + cached_sa[sa_pointer++] = idx; + if (symbol >= c) + { + size_t val = idx - 1; + size_t src = bkt_l[symbol]; + bkt_l[symbol] = bkt_l[symbol] + 1; + if ((src - offset) / partsize == 0) + { + array[src - offset] = val; + } + else + { + size_t part = src / partsize - 1; + cached_array[part].push_back(src); + cached_array[part].push_back(val); + } + } + else + { + right[right_pointer++] = idx; + } + } + } + sa_pointer = C[c]; + while (left_pointer < number_of_lms_strings and text[left[left_pointer]] == c) + { + size_t idx = left[left_pointer--]; + if (idx == 0) + { + idx = n; + } + --idx; + size_t symbol = text[idx]; + size_t val = idx; + size_t src = bkt_l[symbol]; + bkt_l[symbol] = bkt_l[symbol] + 1; + if ((src - offset) / partsize == 0) + { + array[src - offset] = val; + } + else + { + size_t part = src / partsize - 1; + cached_array[part].push_back(src); + cached_array[part].push_back(val); + } + } + } + for (size_t i = 0; i < cached_array.size(); ++i) + { + cached_array[i].close(true); + } + } + left.close(true); + right_pointer--; + { + size_t partsize = bound_s / parts + 1; + int_vector<> array(partsize, 0, int_width); + std::vector> cached_array(parts - 1); + for (size_t i = 0; i < cached_array.size(); ++i) + { + cached_array[i] = int_vector_buffer<>( + tmp_file(filename_sa, "_leftbuffer" + util::to_string(i) + "_" + util::to_string(recursion)), + std::ios::out, + buffersize, + nsize); + } + for (size_t c = sigma - 1, pos = bkt_s_last - 1, offset = partsize * (parts - 1); c < sigma; --c) + { + assert(c < C.size()); + sa_pointer = C[c] - 1; + for (; pos + 1 > bkt_s[c]; --pos) + { + while (pos < offset) + { + offset -= partsize; + for (size_t i = 0, cur_part = offset / partsize; i < cached_array[cur_part].size();) + { + size_t src = cached_array[cur_part][i++]; + size_t val = cached_array[cur_part][i++]; + assert((src - offset) < array.size()); + array[src - offset] = val; + } + assert((offset / partsize) < parts - 1); + cached_array[offset / partsize].reset(); + } + assert((pos - offset) < array.size()); + size_t idx = array[pos - offset]; + if (idx == 0) + { + idx = n; + } + --idx; + assert((idx) < text.size()); + size_t symbol = text[idx]; + if (symbol <= c) + { + if (idx == n - 1) + { + cached_sa[sa_pointer--] = 0; + } + else + { + cached_sa[sa_pointer--] = idx + 1; + } + assert((symbol) < bkt_s.size()); + bkt_s[symbol] = bkt_s[symbol] - 1; + size_t val = idx; + size_t src = bkt_s[symbol]; + if (src >= offset) + { + assert((src - offset) < array.size()); + array[src - offset] = val; + } + else + { + size_t part = src / partsize; + assert(part < parts - 1); + cached_array[part].push_back(src); + cached_array[part].push_back(val); + } + } + else + { + if (idx == n - 1) + { + cached_sa[sa_pointer--] = 0; + } + else + { + cached_sa[sa_pointer--] = idx + 1; + } + } + } + while (right_pointer < number_of_lms_strings and text[right[right_pointer]] == c) + { + size_t idx = right[right_pointer--]; + if (idx == 0) + { + idx = n; + } + --idx; + size_t symbol = text[idx]; + assert((symbol) < bkt_s.size()); + bkt_s[symbol] = bkt_s[symbol] - 1; + size_t val = idx; + size_t src = bkt_s[symbol]; + if (src >= offset) + { + assert((src - offset) < array.size()); + array[src - offset] = val; + } + else + { + size_t part = src / partsize; + assert((part) < parts - 1); + cached_array[part].push_back(src); + cached_array[part].push_back(val); + } + } + } + for (size_t i = 0; i < cached_array.size(); ++i) + { + cached_array[i].close(true); + } + } + right.close(true); + cached_sa.close(); + return; +} +} +#endif +#ifndef INCLUDED_SDSL_DIVSUFSORT +#define INCLUDED_SDSL_DIVSUFSORT +#include +#include +#include +#include +#include +#include +#ifdef _OPENMP +# include +#endif +namespace sdsl +{ +#if !defined(UINT8_MAX) +#define UINT8_MAX (255) +#endif +#define ALPHABET_SIZE (256) +#define BUCKET_A_SIZE (ALPHABET_SIZE) +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) +#define SS_INSERTIONSORT_THRESHOLD (8) +#define SS_BLOCKSIZE (1024) +#define SS_MISORT_STACKSIZE (16) +#define TR_INSERTIONSORT_THRESHOLD (8) +template +struct libdivsufsort_config; +template <> +struct libdivsufsort_config +{ + static constexpr uint64_t TR_STACKSIZE = 64; + static constexpr uint64_t SS_SMERGE_STACKSIZE = 32; +}; +template <> +struct libdivsufsort_config +{ + static constexpr uint64_t TR_STACKSIZE = 96; + static constexpr uint64_t SS_SMERGE_STACKSIZE = 64; +}; +#define BUCKET_A(_c0) bucket_A[(_c0)] +#if ALPHABET_SIZE == 256 +#define BUCKET_B(_c0,_c1) (bucket_B[((_c1) << 8) | (_c0)]) +#define BUCKET_BSTAR(_c0,_c1) (bucket_B[((_c0) << 8) | (_c1)]) +#else +#define BUCKET_B(_c0,_c1) (bucket_B[(_c1)*ALPHABET_SIZE + (_c0)]) +#define BUCKET_BSTAR(_c0,_c1) (bucket_B[(_c0)*ALPHABET_SIZE + (_c1)]) +#endif +#define STACK_PUSH(_a,_b,_c,_d) \ + do \ + { \ + stack[ssize].a = (_a), stack[ssize].b = (_b), stack[ssize].c = (_c), stack[ssize++].d = (_d); \ + } \ + while (0) +#define STACK_PUSH5(_a,_b,_c,_d,_e) \ + do \ + { \ + stack[ssize].a = (_a), stack[ssize].b = (_b), stack[ssize].c = (_c), stack[ssize].d = (_d), \ + stack[ssize++].e = (_e); \ + } \ + while (0) +#define STACK_POP(_a,_b,_c,_d) \ + do \ + { \ + assert(0 <= ssize); \ + if (ssize == 0) \ + { \ + return; \ + } \ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b, (_c) = stack[ssize].c, (_d) = stack[ssize].d; \ + } \ + while (0) +#define STACK_POP5(_a,_b,_c,_d,_e) \ + do \ + { \ + assert(0 <= ssize); \ + if (ssize == 0) \ + { \ + return; \ + } \ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b, (_c) = stack[ssize].c, (_d) = stack[ssize].d, \ + (_e) = stack[ssize].e; \ + } \ + while (0) +static const int32_t lg_table[256] = { + -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) +inline int32_t ss_ilg(int32_t n) +{ +# if SS_BLOCKSIZE == 0 + return (n & 0xffff0000) ? ((n & 0xff000000) ? 24 + lg_table[(n >> 24) & 0xff] : 16 + lg_table[(n >> 16) & 0xff]) + : ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff]); +# elif SS_BLOCKSIZE < 256 + return lg_table[n]; +# else + return (n & 0xff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff]; +# endif +} +inline int32_t ss_ilg(int64_t n) +{ +# if SS_BLOCKSIZE == 0 + return (n >> 32) ? ((n >> 48) ? ((n >> 56) ? 56 + lg_table[(n >> 56) & 0xff] : 48 + lg_table[(n >> 48) & 0xff]) + : ((n >> 40) ? 40 + lg_table[(n >> 40) & 0xff] : 32 + lg_table[(n >> 32) & 0xff])) + : ((n & 0xffff0000) + ? ((n & 0xff000000) ? 24 + lg_table[(n >> 24) & 0xff] : 16 + lg_table[(n >> 16) & 0xff]) + : ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff])); +# elif SS_BLOCKSIZE < 256 + return lg_table[n]; +# else + return (n & 0xff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff]; +# endif +} +#endif +#if SS_BLOCKSIZE != 0 +static const int32_t sqq_table[256] = { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, 64, 65, 67, 69, 71, 73, + 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, + 106, 107, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 128, 128, + 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 144, 145, 146, 147, 148, 149, + 150, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, + 167, 168, 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, 181, 181, 182, 183, + 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, + 198, 199, 199, 200, 201, 201, 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, + 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, 221, 222, 222, 223, 224, 224, + 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, + 237, 237, 238, 238, 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, 247, 248, + 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255}; +template +inline saidx_t ss_isqrt(saidx_t x) +{ + saidx_t y, e; + if (x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) + { + return SS_BLOCKSIZE; + } + e = (x & 0xffff0000) ? ((x & 0xff000000) ? 24 + lg_table[(x >> 24) & 0xff] : 16 + lg_table[(x >> 16) & 0xff]) + : ((x & 0x0000ff00) ? 8 + lg_table[(x >> 8) & 0xff] : 0 + lg_table[(x >> 0) & 0xff]); + if (e >= 16) + { + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); + if (e >= 24) + { + y = (y + 1 + x / y) >> 1; + } + y = (y + 1 + x / y) >> 1; + } + else if (e >= 8) + { + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; + } + else + { + return sqq_table[x] >> 4; + } + return (x < (y * y)) ? y - 1 : y; +} +#endif +template +inline int32_t ss_compare(uint8_t const * T, saidx_t const * p1, saidx_t const * p2, saidx_t depth) +{ + uint8_t const *U1, *U2, *U1n, *U2n; + for (U1 = T + depth + *p1, U2 = T + depth + *p2, U1n = T + *(p1 + 1) + 2, U2n = T + *(p2 + 1) + 2; + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + ++U1, ++U2) + {} + return U1 < U1n ? (U2 < U2n ? *U1 - *U2 : 1) : (U2 < U2n ? -1 : 0); +} +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) +template +inline void ss_insertionsort(uint8_t const * T, saidx_t const * PA, saidx_t * first, saidx_t * last, saidx_t depth) +{ + saidx_t *i, *j; + saidx_t t; + int32_t r; + for (i = last - 2; first <= i; --i) + { + for (t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) + { + do + { + *(j - 1) = *j; + } + while ((++j < last) && (*j < 0)); + if (last <= j) + { + break; + } + } + if (r == 0) + { + *j = ~*j; + } + *(j - 1) = t; + } +} +#endif +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) +template +inline void ss_fixdown(uint8_t const * Td, saidx_t const * PA, saidx_t * SA, saidx_t i, saidx_t size) +{ + saidx_t j, k; + saidx_t v; + int32_t c, d, e; + for (v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) + { + d = Td[PA[SA[k = j++]]]; + if (d < (e = Td[PA[SA[j]]])) + { + k = j; + d = e; + } + if (d <= c) + { + break; + } + } + SA[i] = v; +} +template +inline void ss_heapsort(uint8_t const * Td, saidx_t const * PA, saidx_t * SA, saidx_t size) +{ + saidx_t i, m; + saidx_t t; + m = size; + if ((size % 2) == 0) + { + m--; + if (Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) + { + std::swap(SA[m], SA[m / 2]); + } + } + for (i = m / 2 - 1; 0 <= i; --i) + { + ss_fixdown(Td, PA, SA, i, m); + } + if ((size % 2) == 0) + { + std::swap(SA[0], SA[m]); + ss_fixdown(Td, PA, SA, (saidx_t)0, m); + } + for (i = m - 1; 0 < i; --i) + { + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, (saidx_t)0, i); + SA[i] = t; + } +} +template +inline saidx_t * ss_median3(uint8_t const * Td, saidx_t const * PA, saidx_t * v1, saidx_t * v2, saidx_t * v3) +{ + if (Td[PA[*v1]] > Td[PA[*v2]]) + { + std::swap(v1, v2); + } + if (Td[PA[*v2]] > Td[PA[*v3]]) + { + if (Td[PA[*v1]] > Td[PA[*v3]]) + { + return v1; + } + else + { + return v3; + } + } + return v2; +} +template +inline saidx_t * +ss_median5(uint8_t const * Td, saidx_t const * PA, saidx_t * v1, saidx_t * v2, saidx_t * v3, saidx_t * v4, saidx_t * v5) +{ + if (Td[PA[*v2]] > Td[PA[*v3]]) + { + std::swap(v2, v3); + } + if (Td[PA[*v4]] > Td[PA[*v5]]) + { + std::swap(v4, v5); + } + if (Td[PA[*v2]] > Td[PA[*v4]]) + { + std::swap(v2, v4); + std::swap(v3, v5); + } + if (Td[PA[*v1]] > Td[PA[*v3]]) + { + std::swap(v1, v3); + } + if (Td[PA[*v1]] > Td[PA[*v4]]) + { + std::swap(v1, v4); + std::swap(v3, v5); + } + if (Td[PA[*v3]] > Td[PA[*v4]]) + { + return v4; + } + return v3; +} +template +inline saidx_t * ss_pivot(uint8_t const * Td, saidx_t const * PA, saidx_t * first, saidx_t * last) +{ + saidx_t * middle; + saidx_t t; + t = last - first; + middle = first + t / 2; + if (t <= 512) + { + if (t <= 32) + { + return ss_median3(Td, PA, first, middle, last - 1); + } + else + { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); +} +template +inline saidx_t * ss_partition(saidx_t const * PA, saidx_t * first, saidx_t * last, saidx_t depth) +{ + saidx_t *a, *b; + saidx_t t; + for (a = first - 1, b = last;;) + { + for (; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) + { + *a = ~*a; + } + for (; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) + {} + if (b <= a) + { + break; + } + t = ~*b; + *b = *a; + *a = t; + } + if (first < a) + { + *first = ~*first; + } + return a; +} +template +inline void ss_mintrosort(uint8_t const * T, saidx_t const * PA, saidx_t * first, saidx_t * last, saidx_t depth) +{ + struct + { + saidx_t *a, *b, c; + int32_t d; + } stack[SS_MISORT_STACKSIZE]; + uint8_t const * Td; + saidx_t *a, *b, *c, *d, *e, *f; + saidx_t s, t; + int32_t ssize; + int32_t limit; + int32_t v, x = 0; + for (ssize = 0, limit = ss_ilg((saidx_t)(last - first));;) + { + if ((last - first) <= SS_INSERTIONSORT_THRESHOLD) + { +# if 1 < SS_INSERTIONSORT_THRESHOLD + if (1 < (last - first)) + { + ss_insertionsort(T, PA, first, last, depth); + } +# endif + STACK_POP(first, last, depth, limit); + continue; + } + Td = T + depth; + if (limit-- == 0) + { + ss_heapsort(Td, PA, first, (saidx_t)(last - first)); + } + if (limit < 0) + { + for (a = first + 1, v = Td[PA[*first]]; a < last; ++a) + { + if ((x = Td[PA[*a]]) != v) + { + if (1 < (a - first)) + { + break; + } + v = x; + first = a; + } + } + if (Td[PA[*first] - 1] < v) + { + first = ss_partition(PA, first, a, depth); + } + if ((a - first) <= (last - a)) + { + if (1 < (a - first)) + { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg((saidx_t)(a - first)); + } + else + { + first = a, limit = -1; + } + } + else + { + if (1 < (last - a)) + { + STACK_PUSH(first, a, depth + 1, ss_ilg((saidx_t)(a - first))); + first = a, limit = -1; + } + else + { + last = a, depth += 1, limit = ss_ilg((saidx_t)(a - first)); + } + } + continue; + } + a = ss_pivot(Td, PA, first, last); + v = Td[PA[*a]]; + std::swap(*first, *a); + for (b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) + {} + if (((a = b) < last) && (x < v)) + { + for (; (++b < last) && ((x = Td[PA[*b]]) <= v);) + { + if (x == v) + { + std::swap(*b, *a); + ++a; + } + } + } + for (c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) + {} + if ((b < (d = c)) && (x > v)) + { + for (; (b < --c) && ((x = Td[PA[*c]]) >= v);) + { + if (x == v) + { + std::swap(*c, *d); + --d; + } + } + } + for (; b < c;) + { + std::swap(*b, *c); + for (; (++b < c) && ((x = Td[PA[*b]]) <= v);) + { + if (x == v) + { + std::swap(*b, *a); + ++a; + } + } + for (; (b < --c) && ((x = Td[PA[*c]]) >= v);) + { + if (x == v) + { + std::swap(*c, *d); + --d; + } + } + } + if (a <= d) + { + c = b - 1; + if ((s = a - first) > (t = b - a)) + { + s = t; + } + for (e = first, f = b - s; 0 < s; --s, ++e, ++f) + { + std::swap(*e, *f); + } + if ((s = d - c) > (t = last - d - 1)) + { + s = t; + } + for (e = b, f = last - s; 0 < s; --s, ++e, ++f) + { + std::swap(*e, *f); + } + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + if ((a - first) <= (last - c)) + { + if ((last - c) <= (c - b)) + { + STACK_PUSH(b, c, depth + 1, ss_ilg((saidx_t)(c - b))); + STACK_PUSH(c, last, depth, limit); + last = a; + } + else if ((a - first) <= (c - b)) + { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg((saidx_t)(c - b))); + last = a; + } + else + { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg((saidx_t)(c - b)); + } + } + else + { + if ((a - first) <= (c - b)) + { + STACK_PUSH(b, c, depth + 1, ss_ilg((saidx_t)(c - b))); + STACK_PUSH(first, a, depth, limit); + first = c; + } + else if ((last - c) <= (c - b)) + { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg((saidx_t)(c - b))); + first = c; + } + else + { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg((saidx_t)(c - b)); + } + } + } + else + { + limit += 1; + if (Td[PA[*first] - 1] < v) + { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg((saidx_t)(last - first)); + } + depth += 1; + } + } +} +#endif +#if SS_BLOCKSIZE != 0 +template +inline void ss_blockswap(saidx_t * a, saidx_t * b, saidx_t n) +{ + saidx_t t; + for (; 0 < n; --n, ++a, ++b) + { + t = *a, *a = *b, *b = t; + } +} +template +inline void ss_rotate(saidx_t * first, saidx_t * middle, saidx_t * last) +{ + saidx_t *a, *b, t; + saidx_t l, r; + l = middle - first, r = last - middle; + for (; (0 < l) && (0 < r);) + { + if (l == r) + { + ss_blockswap(first, middle, l); + break; + } + if (l < r) + { + a = last - 1, b = middle - 1; + t = *a; + do + { + *a-- = *b, *b-- = *a; + if (b < first) + { + *a = t; + last = a; + if ((r -= l + 1) <= l) + { + break; + } + a -= 1, b = middle - 1; + t = *a; + } + } + while (1); + } + else + { + a = first, b = middle; + t = *a; + do + { + *a++ = *b, *b++ = *a; + if (last <= b) + { + *a = t; + first = a + 1; + if ((l -= r + 1) <= r) + { + break; + } + a += 1, b = middle; + t = *a; + } + } + while (1); + } + } +} +template +inline void +ss_inplacemerge(uint8_t const * T, saidx_t const * PA, saidx_t * first, saidx_t * middle, saidx_t * last, saidx_t depth) +{ + saidx_t const * p; + saidx_t *a, *b; + saidx_t len, half; + int32_t q, r; + int32_t x; + for (;;) + { + if (*(last - 1) < 0) + { + x = 1; + p = PA + ~*(last - 1); + } + else + { + x = 0; + p = PA + *(last - 1); + } + for (a = first, len = middle - first, half = len >> 1, r = -1; 0 < len; len = half, half >>= 1) + { + b = a + half; + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); + if (q < 0) + { + a = b + 1; + half -= (len & 1) ^ 1; + } + else + { + r = q; + } + } + if (a < middle) + { + if (r == 0) + { + *a = ~*a; + } + ss_rotate(a, middle, last); + last -= middle - a; + middle = a; + if (first == middle) + { + break; + } + } + --last; + if (x != 0) + { + while (*--last < 0) + {} + } + if (middle == last) + { + break; + } + } +} +template +inline void ss_mergeforward(uint8_t const * T, + saidx_t const * PA, + saidx_t * first, + saidx_t * middle, + saidx_t * last, + saidx_t * buf, + saidx_t depth) +{ + saidx_t *a, *b, *c, *bufend; + saidx_t t; + int32_t r; + bufend = buf + (middle - first) - 1; + ss_blockswap(buf, first, (saidx_t)(middle - first)); + for (t = *(a = first), b = buf, c = middle;;) + { + r = ss_compare(T, PA + *b, PA + *c, depth); + if (r < 0) + { + do + { + *a++ = *b; + if (bufend <= b) + { + *bufend = t; + return; + } + *b++ = *a; + } + while (*b < 0); + } + else if (r > 0) + { + do + { + *a++ = *c, *c++ = *a; + if (last <= c) + { + while (b < bufend) + { + *a++ = *b, *b++ = *a; + } + *a = *b, *b = t; + return; + } + } + while (*c < 0); + } + else + { + *c = ~*c; + do + { + *a++ = *b; + if (bufend <= b) + { + *bufend = t; + return; + } + *b++ = *a; + } + while (*b < 0); + do + { + *a++ = *c, *c++ = *a; + if (last <= c) + { + while (b < bufend) + { + *a++ = *b, *b++ = *a; + } + *a = *b, *b = t; + return; + } + } + while (*c < 0); + } + } +} +template +inline void ss_mergebackward(uint8_t const * T, + saidx_t const * PA, + saidx_t * first, + saidx_t * middle, + saidx_t * last, + saidx_t * buf, + saidx_t depth) +{ + saidx_t const *p1, *p2; + saidx_t *a, *b, *c, *bufend; + saidx_t t; + int32_t r; + int32_t x; + bufend = buf + (last - middle) - 1; + ss_blockswap(buf, middle, (saidx_t)(last - middle)); + x = 0; + if (*bufend < 0) + { + p1 = PA + ~*bufend; + x |= 1; + } + else + { + p1 = PA + *bufend; + } + if (*(middle - 1) < 0) + { + p2 = PA + ~*(middle - 1); + x |= 2; + } + else + { + p2 = PA + *(middle - 1); + } + for (t = *(a = last - 1), b = bufend, c = middle - 1;;) + { + r = ss_compare(T, p1, p2, depth); + if (0 < r) + { + if (x & 1) + { + do + { + *a-- = *b, *b-- = *a; + } + while (*b < 0); + x ^= 1; + } + *a-- = *b; + if (b <= buf) + { + *buf = t; + break; + } + *b-- = *a; + if (*b < 0) + { + p1 = PA + ~*b; + x |= 1; + } + else + { + p1 = PA + *b; + } + } + else if (r < 0) + { + if (x & 2) + { + do + { + *a-- = *c, *c-- = *a; + } + while (*c < 0); + x ^= 2; + } + *a-- = *c, *c-- = *a; + if (c < first) + { + while (buf < b) + { + *a-- = *b, *b-- = *a; + } + *a = *b, *b = t; + break; + } + if (*c < 0) + { + p2 = PA + ~*c; + x |= 2; + } + else + { + p2 = PA + *c; + } + } + else + { + if (x & 1) + { + do + { + *a-- = *b, *b-- = *a; + } + while (*b < 0); + x ^= 1; + } + *a-- = ~*b; + if (b <= buf) + { + *buf = t; + break; + } + *b-- = *a; + if (x & 2) + { + do + { + *a-- = *c, *c-- = *a; + } + while (*c < 0); + x ^= 2; + } + *a-- = *c, *c-- = *a; + if (c < first) + { + while (buf < b) + { + *a-- = *b, *b-- = *a; + } + *a = *b, *b = t; + break; + } + if (*b < 0) + { + p1 = PA + ~*b; + x |= 1; + } + else + { + p1 = PA + *b; + } + if (*c < 0) + { + p2 = PA + ~*c; + x |= 2; + } + else + { + p2 = PA + *c; + } + } + } +} +template +inline void ss_swapmerge(uint8_t const * T, + saidx_t const * PA, + saidx_t * first, + saidx_t * middle, + saidx_t * last, + saidx_t * buf, + saidx_t bufsize, + saidx_t depth) +{ +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) +#define MERGE_CHECK(a,b,c) \ + do \ + { \ + if (((c)&1) || (((c)&2) && (ss_compare(T, PA + GETIDX(*((a)-1)), PA + *(a), depth) == 0))) \ + { \ + *(a) = ~*(a); \ + } \ + if (((c)&4) && ((ss_compare(T, PA + GETIDX(*((b)-1)), PA + *(b), depth) == 0))) \ + { \ + *(b) = ~*(b); \ + } \ + } \ + while (0) + struct + { + saidx_t *a, *b, *c; + int32_t d; + } stack[libdivsufsort_config::SS_SMERGE_STACKSIZE]; + saidx_t *l, *r, *lm, *rm; + saidx_t m, len, half; + int32_t ssize; + int32_t check, next; + for (check = 0, ssize = 0;;) + { + if ((last - middle) <= bufsize) + { + if ((first < middle) && (middle < last)) + { + ss_mergebackward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + if ((middle - first) <= bufsize) + { + if (first < middle) + { + ss_mergeforward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + for (m = 0, len = std::min(middle - first, last - middle), half = len >> 1; 0 < len; len = half, half >>= 1) + { + if (ss_compare(T, PA + GETIDX(*(middle + m + half)), PA + GETIDX(*(middle - m - half - 1)), depth) < 0) + { + m += half + 1; + half -= (len & 1) ^ 1; + } + } + if (0 < m) + { + lm = middle - m, rm = middle + m; + ss_blockswap(lm, middle, m); + l = r = middle, next = 0; + if (rm < last) + { + if (*rm < 0) + { + *rm = ~*rm; + if (first < lm) + { + for (; *--l < 0;) + {} + next |= 4; + } + next |= 1; + } + else if (first < lm) + { + for (; *r < 0; ++r) + {} + next |= 2; + } + } + if ((l - first) <= (last - r)) + { + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); + middle = lm, last = l, check = (check & 3) | (next & 4); + } + else + { + if ((next & 2) && (r == middle)) + { + next ^= 6; + } + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); + first = r, middle = rm, check = (next & 3) | (check & 4); + } + } + else + { + if (ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) + { + *middle = ~*middle; + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + } + } +} +#endif +template +void sssort(uint8_t const * T, + saidx_t const * PA, + saidx_t * first, + saidx_t * last, + saidx_t * buf, + saidx_t bufsize, + saidx_t depth, + saidx_t n, + int32_t lastsuffix) +{ + saidx_t * a; +#if SS_BLOCKSIZE != 0 + saidx_t *b, *middle, *curbuf; + saidx_t j, k, curbufsize, limit; +#endif + saidx_t i; + if (lastsuffix != 0) + { + ++first; + } +#if SS_BLOCKSIZE == 0 + ss_mintrosort(T, PA, first, last, depth); +#else + if ((bufsize < SS_BLOCKSIZE) && (bufsize < (last - first)) && (bufsize < (limit = ss_isqrt(last - first)))) + { + if (SS_BLOCKSIZE < limit) + { + limit = SS_BLOCKSIZE; + } + buf = middle = last - limit, bufsize = limit; + } + else + { + middle = last, limit = 0; + } + for (a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) + { +# if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); +# elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); +# endif + curbufsize = last - (a + SS_BLOCKSIZE); + curbuf = a + SS_BLOCKSIZE; + if (curbufsize <= bufsize) + { + curbufsize = bufsize, curbuf = buf; + } + for (b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) + { + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); + } + } +# if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, middle, depth); +# elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, middle, depth); +# endif + for (k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) + { + if (i & 1) + { + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); + a -= k; + } + } + if (limit != 0) + { +# if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, middle, last, depth); +# elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, middle, last, depth); +# endif + ss_inplacemerge(T, PA, first, middle, last, depth); + } +#endif + if (lastsuffix != 0) + { + saidx_t PAi[2]; + PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for (a = first, i = *(first - 1); (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) + { + *(a - 1) = *a; + } + *(a - 1) = i; + } +} +inline int32_t tr_ilg(int32_t n) +{ + return (n & 0xffff0000) ? ((n & 0xff000000) ? 24 + lg_table[(n >> 24) & 0xff] : 16 + lg_table[(n >> 16) & 0xff]) + : ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff]); +} +inline int32_t tr_ilg(int64_t n) +{ + return (n >> 32) ? ((n >> 48) ? ((n >> 56) ? 56 + lg_table[(n >> 56) & 0xff] : 48 + lg_table[(n >> 48) & 0xff]) + : ((n >> 40) ? 40 + lg_table[(n >> 40) & 0xff] : 32 + lg_table[(n >> 32) & 0xff])) + : ((n & 0xffff0000) + ? ((n & 0xff000000) ? 24 + lg_table[(n >> 24) & 0xff] : 16 + lg_table[(n >> 16) & 0xff]) + : ((n & 0x0000ff00) ? 8 + lg_table[(n >> 8) & 0xff] : 0 + lg_table[(n >> 0) & 0xff])); +} +template +inline void tr_insertionsort(saidx_t const * ISAd, saidx_t * first, saidx_t * last) +{ + saidx_t *a, *b; + saidx_t t, r; + for (a = first + 1; a < last; ++a) + { + for (t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) + { + do + { + *(b + 1) = *b; + } + while ((first <= --b) && (*b < 0)); + if (b < first) + { + break; + } + } + if (r == 0) + { + *b = ~*b; + } + *(b + 1) = t; + } +} +template +inline void tr_fixdown(saidx_t const * ISAd, saidx_t * SA, saidx_t i, saidx_t size) +{ + saidx_t j, k; + saidx_t v; + saidx_t c, d, e; + for (v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) + { + d = ISAd[SA[k = j++]]; + if (d < (e = ISAd[SA[j]])) + { + k = j; + d = e; + } + if (d <= c) + { + break; + } + } + SA[i] = v; +} +template +inline void tr_heapsort(saidx_t const * ISAd, saidx_t * SA, saidx_t size) +{ + saidx_t i, m; + saidx_t t; + m = size; + if ((size % 2) == 0) + { + m--; + if (ISAd[SA[m / 2]] < ISAd[SA[m]]) + { + std::swap(SA[m], SA[m / 2]); + } + } + for (i = m / 2 - 1; 0 <= i; --i) + { + tr_fixdown(ISAd, SA, i, m); + } + if ((size % 2) == 0) + { + std::swap(SA[0], SA[m]); + tr_fixdown(ISAd, SA, (saidx_t)0, m); + } + for (i = m - 1; 0 < i; --i) + { + t = SA[0], SA[0] = SA[i]; + tr_fixdown(ISAd, SA, (saidx_t)0, i); + SA[i] = t; + } +} +template +inline saidx_t * tr_median3(saidx_t const * ISAd, saidx_t * v1, saidx_t * v2, saidx_t * v3) +{ + if (ISAd[*v1] > ISAd[*v2]) + { + std::swap(v1, v2); + } + if (ISAd[*v2] > ISAd[*v3]) + { + if (ISAd[*v1] > ISAd[*v3]) + { + return v1; + } + else + { + return v3; + } + } + return v2; +} +template +inline saidx_t * tr_median5(saidx_t const * ISAd, saidx_t * v1, saidx_t * v2, saidx_t * v3, saidx_t * v4, saidx_t * v5) +{ + if (ISAd[*v2] > ISAd[*v3]) + { + std::swap(v2, v3); + } + if (ISAd[*v4] > ISAd[*v5]) + { + std::swap(v4, v5); + } + if (ISAd[*v2] > ISAd[*v4]) + { + std::swap(v2, v4); + std::swap(v3, v5); + } + if (ISAd[*v1] > ISAd[*v3]) + { + std::swap(v1, v3); + } + if (ISAd[*v1] > ISAd[*v4]) + { + std::swap(v1, v4); + std::swap(v3, v5); + } + if (ISAd[*v3] > ISAd[*v4]) + { + return v4; + } + return v3; +} +template +inline saidx_t * tr_pivot(saidx_t const * ISAd, saidx_t * first, saidx_t * last) +{ + saidx_t * middle; + saidx_t t; + t = last - first; + middle = first + t / 2; + if (t <= 512) + { + if (t <= 32) + { + return tr_median3(ISAd, first, middle, last - 1); + } + else + { + t >>= 2; + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(ISAd, middle - t, middle, middle + t); + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + return tr_median3(ISAd, first, middle, last); +} +template +struct _trbudget_t +{ + saidx_t chance; + saidx_t remain; + saidx_t incval; + saidx_t count; +}; +template +using trbudget_t = struct _trbudget_t; +template +inline void trbudget_init(trbudget_t * budget, saidx_t chance, saidx_t incval) +{ + budget->chance = chance; + budget->remain = budget->incval = incval; +} +template +inline int32_t trbudget_check(trbudget_t * budget, saidx_t size) +{ + if (size <= budget->remain) + { + budget->remain -= size; + return 1; + } + if (budget->chance == 0) + { + budget->count += size; + return 0; + } + budget->remain += budget->incval - size; + budget->chance -= 1; + return 1; +} +template +inline void tr_partition(saidx_t const * ISAd, + saidx_t * first, + saidx_t * middle, + saidx_t * last, + saidx_t ** pa, + saidx_t ** pb, + saidx_t v) +{ + saidx_t *a, *b, *c, *d, *e, *f; + saidx_t t, s; + saidx_t x = 0; + for (b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) + {} + if (((a = b) < last) && (x < v)) + { + for (; (++b < last) && ((x = ISAd[*b]) <= v);) + { + if (x == v) + { + std::swap(*b, *a); + ++a; + } + } + } + for (c = last; (b < --c) && ((x = ISAd[*c]) == v);) + {} + if ((b < (d = c)) && (x > v)) + { + for (; (b < --c) && ((x = ISAd[*c]) >= v);) + { + if (x == v) + { + std::swap(*c, *d); + --d; + } + } + } + for (; b < c;) + { + std::swap(*b, *c); + for (; (++b < c) && ((x = ISAd[*b]) <= v);) + { + if (x == v) + { + std::swap(*b, *a); + ++a; + } + } + for (; (b < --c) && ((x = ISAd[*c]) >= v);) + { + if (x == v) + { + std::swap(*c, *d); + --d; + } + } + } + if (a <= d) + { + c = b - 1; + if ((s = a - first) > (t = b - a)) + { + s = t; + } + for (e = first, f = b - s; 0 < s; --s, ++e, ++f) + { + std::swap(*e, *f); + } + if ((s = d - c) > (t = last - d - 1)) + { + s = t; + } + for (e = b, f = last - s; 0 < s; --s, ++e, ++f) + { + std::swap(*e, *f); + } + first += (b - a), last -= (d - c); + } + *pa = first, *pb = last; +} +template +inline void +tr_copy(saidx_t * ISA, saidx_t const * SA, saidx_t * first, saidx_t * a, saidx_t * b, saidx_t * last, saidx_t depth) +{ + saidx_t *c, *d, *e; + saidx_t s, v; + v = b - SA - 1; + for (c = first, d = a - 1; c <= d; ++c) + { + if ((0 <= (s = *c - depth)) && (ISA[s] == v)) + { + *++d = s; + ISA[s] = d - SA; + } + } + for (c = last - 1, e = d + 1, d = b; e < d; --c) + { + if ((0 <= (s = *c - depth)) && (ISA[s] == v)) + { + *--d = s; + ISA[s] = d - SA; + } + } +} +template +inline void tr_partialcopy(saidx_t * ISA, + saidx_t const * SA, + saidx_t * first, + saidx_t * a, + saidx_t * b, + saidx_t * last, + saidx_t depth) +{ + saidx_t *c, *d, *e; + saidx_t s, v; + saidx_t rank, lastrank, newrank = -1; + v = b - SA - 1; + lastrank = -1; + for (c = first, d = a - 1; c <= d; ++c) + { + if ((0 <= (s = *c - depth)) && (ISA[s] == v)) + { + *++d = s; + rank = ISA[s + depth]; + if (lastrank != rank) + { + lastrank = rank; + newrank = d - SA; + } + ISA[s] = newrank; + } + } + lastrank = -1; + for (e = d; first <= e; --e) + { + rank = ISA[*e]; + if (lastrank != rank) + { + lastrank = rank; + newrank = e - SA; + } + if (newrank != rank) + { + ISA[*e] = newrank; + } + } + lastrank = -1; + for (c = last - 1, e = d + 1, d = b; e < d; --c) + { + if ((0 <= (s = *c - depth)) && (ISA[s] == v)) + { + *--d = s; + rank = ISA[s + depth]; + if (lastrank != rank) + { + lastrank = rank; + newrank = d - SA; + } + ISA[s] = newrank; + } + } +} +template +inline void tr_introsort(saidx_t * ISA, + saidx_t const * ISAd, + saidx_t * SA, + saidx_t * first, + saidx_t * last, + trbudget_t * budget) +{ + struct + { + saidx_t const * a; + saidx_t *b, *c; + int32_t d, e; + } stack[libdivsufsort_config::TR_STACKSIZE]; + saidx_t *a, *b, *c; + saidx_t v, x = 0; + saidx_t incr = ISAd - ISA; + int32_t limit, next; + int32_t ssize, trlink = -1; + for (ssize = 0, limit = tr_ilg((saidx_t)(last - first));;) + { + if (limit < 0) + { + if (limit == -1) + { + tr_partition(ISAd - incr, first, first, last, &a, &b, (saidx_t)(last - SA - 1)); + if (a < last) + { + for (c = first, v = a - SA - 1; c < a; ++c) + { + ISA[*c] = v; + } + } + if (b < last) + { + for (c = a, v = b - SA - 1; c < b; ++c) + { + ISA[*c] = v; + } + } + if (1 < (b - a)) + { + STACK_PUSH5(NULL, a, b, 0, 0); + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); + trlink = ssize - 2; + } + if ((a - first) <= (last - b)) + { + if (1 < (a - first)) + { + STACK_PUSH5(ISAd, b, last, tr_ilg((saidx_t)(last - b)), trlink); + last = a, limit = tr_ilg((saidx_t)(a - first)); + } + else if (1 < (last - b)) + { + first = b, limit = tr_ilg((saidx_t)(last - b)); + } + else + { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + else + { + if (1 < (last - b)) + { + STACK_PUSH5(ISAd, first, a, tr_ilg((saidx_t)(a - first)), trlink); + first = b, limit = tr_ilg((saidx_t)(last - b)); + } + else if (1 < (a - first)) + { + last = a, limit = tr_ilg((saidx_t)(a - first)); + } + else + { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + else if (limit == -2) + { + a = stack[--ssize].b, b = stack[ssize].c; + if (stack[ssize].d == 0) + { + tr_copy(ISA, SA, first, a, b, last, (saidx_t)(ISAd - ISA)); + } + else + { + if (0 <= trlink) + { + stack[trlink].d = -1; + } + tr_partialcopy(ISA, SA, first, a, b, last, (saidx_t)(ISAd - ISA)); + } + STACK_POP5(ISAd, first, last, limit, trlink); + } + else + { + if (0 <= *first) + { + a = first; + do + { + ISA[*a] = a - SA; + } + while ((++a < last) && (0 <= *a)); + first = a; + } + if (first < last) + { + a = first; + do + { + *a = ~*a; + } + while (*++a < 0); + next = (ISA[*a] != ISAd[*a]) ? tr_ilg((saidx_t)(a - first + 1)) : -1; + if (++a < last) + { + for (b = first, v = a - SA - 1; b < a; ++b) + { + ISA[*b] = v; + } + } + if (trbudget_check(budget, (saidx_t)(a - first))) + { + if ((a - first) <= (last - a)) + { + STACK_PUSH5(ISAd, a, last, -3, trlink); + ISAd += incr, last = a, limit = next; + } + else + { + if (1 < (last - a)) + { + STACK_PUSH5(ISAd + incr, first, a, next, trlink); + first = a, limit = -3; + } + else + { + ISAd += incr, last = a, limit = next; + } + } + } + else + { + if (0 <= trlink) + { + stack[trlink].d = -1; + } + if (1 < (last - a)) + { + first = a, limit = -3; + } + else + { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + else + { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + continue; + } + if ((last - first) <= TR_INSERTIONSORT_THRESHOLD) + { + tr_insertionsort(ISAd, first, last); + limit = -3; + continue; + } + if (limit-- == 0) + { + tr_heapsort(ISAd, first, (saidx_t)(last - first)); + for (a = last - 1; first < a; a = b) + { + for (x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) + { + *b = ~*b; + } + } + limit = -3; + continue; + } + a = tr_pivot(ISAd, first, last); + std::swap(*first, *a); + v = ISAd[*first]; + tr_partition(ISAd, first, first + 1, last, &a, &b, v); + if ((last - first) != (b - a)) + { + next = (ISA[*a] != v) ? tr_ilg((saidx_t)(b - a)) : -1; + for (c = first, v = a - SA - 1; c < a; ++c) + { + ISA[*c] = v; + } + if (b < last) + { + for (c = a, v = b - SA - 1; c < b; ++c) + { + ISA[*c] = v; + } + } + if ((1 < (b - a)) && (trbudget_check(budget, (saidx_t)(b - a)))) + { + if ((a - first) <= (last - b)) + { + if ((last - b) <= (b - a)) + { + if (1 < (a - first)) + { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } + else if (1 < (last - b)) + { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } + else + { + ISAd += incr, first = a, last = b, limit = next; + } + } + else if ((a - first) <= (b - a)) + { + if (1 < (a - first)) + { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } + else + { + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + else + { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + else + { + if ((a - first) <= (b - a)) + { + if (1 < (last - b)) + { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } + else if (1 < (a - first)) + { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } + else + { + ISAd += incr, first = a, last = b, limit = next; + } + } + else if ((last - b) <= (b - a)) + { + if (1 < (last - b)) + { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } + else + { + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + else + { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + } + else + { + if ((1 < (b - a)) && (0 <= trlink)) + { + stack[trlink].d = -1; + } + if ((a - first) <= (last - b)) + { + if (1 < (a - first)) + { + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } + else if (1 < (last - b)) + { + first = b; + } + else + { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + else + { + if (1 < (last - b)) + { + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } + else if (1 < (a - first)) + { + last = a; + } + else + { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + } + else + { + if (trbudget_check(budget, (saidx_t)(last - first))) + { + limit = tr_ilg((saidx_t)(last - first)), ISAd += incr; + } + else + { + if (0 <= trlink) + { + stack[trlink].d = -1; + } + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } +} +template +inline void trsort(saidx_t * ISA, saidx_t * SA, saidx_t n, saidx_t depth) +{ + saidx_t * ISAd; + saidx_t *first, *last; + trbudget_t budget; + saidx_t t, skip, unsorted; + trbudget_init(&budget, (saidx_t)(tr_ilg(n) * 2 / 3), n); + for (ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) + { + first = SA; + skip = 0; + unsorted = 0; + do + { + if ((t = *first) < 0) + { + first -= t; + skip += t; + } + else + { + if (skip != 0) + { + *(first + skip) = skip; + skip = 0; + } + last = SA + ISA[t] + 1; + if (1 < (last - first)) + { + budget.count = 0; + tr_introsort(ISA, ISAd, SA, first, last, &budget); + if (budget.count != 0) + { + unsorted += budget.count; + } + else + { + skip = first - last; + } + } + else if ((last - first) == 1) + { + skip = -1; + } + first = last; + } + } + while (first < (SA + n)); + if (skip != 0) + { + *(first + skip) = skip; + } + if (unsorted == 0) + { + break; + } + } +} +template +inline saidx_t sort_typeBstar(uint8_t const * T, saidx_t * SA, saidx_t * bucket_A, saidx_t * bucket_B, saidx_t n) +{ + saidx_t *PAb, *ISAb, *buf; +#ifdef _OPENMP + saidx_t * curbuf; + saidx_t l; +#endif + saidx_t i, j, k, t, m, bufsize; + int32_t c0, c1; +#ifdef _OPENMP + int32_t d0, d1; + int tmp; +#endif + for (i = 0; i < BUCKET_A_SIZE; ++i) + { + bucket_A[i] = 0; + } + for (i = 0; i < BUCKET_B_SIZE; ++i) + { + bucket_B[i] = 0; + } + for (i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) + { + do + { + ++BUCKET_A(c1 = c0); + } + while ((0 <= --i) && ((c0 = T[i]) >= c1)); + if (0 <= i) + { + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) + { + ++BUCKET_B(c0, c1); + } + } + } + m = n - m; + for (c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) + { + t = i + BUCKET_A(c0); + BUCKET_A(c0) = i + j; + i = t + BUCKET_B(c0, c0); + for (c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) + { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; + i += BUCKET_B(c0, c1); + } + } + if (0 < m) + { + PAb = SA + n - m; + ISAb = SA + m; + for (i = m - 2; 0 <= i; --i) + { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; + } + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; +#ifdef _OPENMP + tmp = omp_get_max_threads(); + buf = SA + m, bufsize = (n - (2 * m)) / tmp; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; +#pragma omp parallel default(shared) private(curbuf, k, l, d0, d1, tmp) + { + tmp = omp_get_thread_num(); + curbuf = buf + tmp * bufsize; + k = 0; + for (;;) + { +#pragma omp critical(sssort_lock) + { + if (0 < (l = j)) + { + d0 = c0, d1 = c1; + do + { + k = BUCKET_BSTAR(d0, d1); + if (--d1 <= d0) + { + d1 = ALPHABET_SIZE - 1; + if (--d0 < 0) + { + break; + } + } + } + while (((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } + } + if (l == 0) + { + break; + } + sssort(T, PAb, SA + k, SA + l, curbuf, bufsize, (saidx_t)2, n, *(SA + k) == (m - 1)); + } + } +#else + buf = SA + m, bufsize = n - (2 * m); + for (c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) + { + for (c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) + { + i = BUCKET_BSTAR(c0, c1); + if (1 < (j - i)) + { + sssort(T, PAb, SA + i, SA + j, buf, bufsize, (saidx_t)2, n, *(SA + i) == (m - 1)); + } + } + } +#endif + for (i = m - 1; 0 <= i; --i) + { + if (0 <= SA[i]) + { + j = i; + do + { + ISAb[SA[i]] = i; + } + while ((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if (i <= 0) + { + break; + } + } + j = i; + do + { + ISAb[SA[i] = ~SA[i]] = j; + } + while (SA[--i] < 0); + ISAb[SA[i]] = j; + } + trsort(ISAb, SA, m, (saidx_t)1); + for (i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) + { + for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) + {} + if (0 <= i) + { + t = i; + for (--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) + {} + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } + } + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; + for (c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) + { + i = BUCKET_A(c0 + 1) - 1; + for (c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) + { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; + for (i = t, j = BUCKET_BSTAR(c0, c1); j <= k; --i, --k) + { + SA[i] = SA[k]; + } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; + BUCKET_B(c0, c0) = i; + } + } + return m; +} +template +inline void construct_SA(uint8_t const * T, saidx_t * SA, saidx_t * bucket_A, saidx_t * bucket_B, saidx_t n, saidx_t m) +{ + saidx_t *i, *j, *k; + saidx_t s; + int32_t c0, c1, c2; + if (0 < m) + { + for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) + { + for (i = SA + BUCKET_BSTAR(c1, c1 + 1), j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; i <= j; --j) + { + if (0 < (s = *j)) + { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if ((0 < s) && (T[s - 1] > c0)) + { + s = ~s; + } + if (c0 != c2) + { + if (0 <= c2) + { + BUCKET_B(c2, c1) = k - SA; + } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } + else + { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } + } + } + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + for (i = SA, j = SA + n; i < j; ++i) + { + if (0 < (s = *i)) + { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + if ((s == 0) || (T[s - 1] < c0)) + { + s = ~s; + } + if (c0 != c2) + { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } + else + { + assert(s < 0); + *i = ~s; + } + } +} +template +inline saidx_t +construct_BWT(uint8_t const * T, saidx_t * SA, saidx_t * bucket_A, saidx_t * bucket_B, saidx_t n, saidx_t m) +{ + saidx_t *i, *j, *k, *orig; + saidx_t s; + int32_t c0, c1, c2; + if (0 < m) + { + for (c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) + { + for (i = SA + BUCKET_BSTAR(c1, c1 + 1), j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; i <= j; --j) + { + if (0 < (s = *j)) + { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((saidx_t)c0); + if ((0 < s) && (T[s - 1] > c0)) + { + s = ~s; + } + if (c0 != c2) + { + if (0 <= c2) + { + BUCKET_B(c2, c1) = k - SA; + } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); + *k-- = s; + } + else if (s != 0) + { + *j = ~s; +#ifndef NDEBUG + } + else + { + assert(T[s] == c1); +#endif + } + } + } + } + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((saidx_t)T[n - 2]) : (n - 1); + for (i = SA, j = SA + n, orig = SA; i < j; ++i) + { + if (0 < (s = *i)) + { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + *i = c0; + if ((0 < s) && (T[s - 1] < c0)) + { + s = ~((saidx_t)T[s - 1]); + } + if (c0 != c2) + { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } + else if (s != 0) + { + *i = ~s; + } + else + { + orig = i; + } + } + return orig - SA; +} +template +int32_t divsufsort(uint8_t const * T, saidx_t * SA, saidx_t n) +{ + saidx_t *bucket_A, *bucket_B; + saidx_t m; + int32_t err = 0; + if ((T == NULL) || (SA == NULL) || (n < 0)) + { + return -1; + } + else if (n == 0) + { + return 0; + } + else if (n == 1) + { + SA[0] = 0; + return 0; + } + else if (n == 2) + { + m = (T[0] < T[1]); + SA[m ^ 1] = 0, SA[m] = 1; + return 0; + } + bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); + bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); + if ((bucket_A != NULL) && (bucket_B != NULL)) + { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } + else + { + err = -2; + } + free(bucket_B); + free(bucket_A); + return err; +} +inline int32_t divsufsort64(uint8_t const * T, int64_t * SA, int64_t n) +{ + return divsufsort(T, SA, n); +} +template +inline int _compare(uint8_t const * T, saidx_t Tsize, uint8_t const * P, saidx_t Psize, saidx_t suf, saidx_t * match) +{ + saidx_t i, j; + int32_t r; + for (i = suf + *match, j = *match, r = 0; (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) + {} + *match = j; + return (r == 0) ? -(j != Psize) : r; +} +} +#endif +#ifndef INCLUDED_SDSL_QSUFSORT +#define INCLUDED_SDSL_QSUFSORT +#define DBG_OUT \ + if (0) \ + std::cout +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +namespace qsufsort +{ +template > +class sorter; +template +void construct_sa(int_vector_type & sa, char const * file, uint8_t num_bytes) +{ + sorter s; + s.sort(sa, file, num_bytes); +} +template +void construct_sa(int_vector_type & sa, t_vec & text) +{ + sorter s; + s.sort(sa, text); +} +template +class sorter +{ + typedef int_vector_type tIV; + typedef typename tIV::iterator int_iter; + typedef typename tIV::size_type size_type; +private: + int_iter m_SA, + m_VV; + uint64_t m_rr, + m_hh; + uint8_t m_msb; + uint64_t m_msb_mask; + inline int64_t to_sign(uint64_t x) const + { + return x & m_msb_mask ? -((int64_t)(x & ~m_msb_mask)) : x; + } + inline int64_t mark_pos(uint64_t x) const + { + return (x & ~m_msb_mask); + } + inline int64_t mark_neg(uint64_t x) const + { + return x | m_msb_mask; + } + inline bool not_neg(uint64_t x) const + { + return !(x >> m_msb); + } + inline bool is_neg(uint64_t x) const + { + return x & m_msb_mask; + } + inline uint64_t key(int_iter const & p) const + { + return m_VV[*p + m_hh]; + } + inline void swap(int_iter & p, int_iter & q) const + { + uint64_t tmp = *p; + *p = *q; + *q = tmp; + } + inline int_iter const & med3(int_iter const & a, int_iter const & b, int_iter const & c) const + { + return key(a) < key(b) ? (key(b) < key(c) ? b : (key(a) < key(c) ? c : a)) + : (key(b) > key(c) ? b : (key(a) > key(c) ? c : a)); + } + void update_group(int_iter pl, int_iter pm) + { + int64_t g = pm - m_SA; + m_VV[*pl] = g; + if (pl == pm) + *pl = mark_neg(1); + else + do + m_VV[*++pl] = g; + while (pl < pm); + } + void select_sort_split(int_iter const & p, int64_t n) + { + int_iter pa, pb, pi, pn; + uint64_t f, v; + pa = p; + pn = p + n - 1; + while (pa < pn) + { + for (pi = pb = (pa + 1), f = key(pa); pi <= pn; ++pi) + if ((v = key(pi)) < f) + { + f = v; + swap(pi, pa); + pb = pa + 1; + } + else if (v == f) + { + swap(pi, pb); + ++pb; + } + update_group(pa, pb - 1); + pa = pb; + } + if (pa == pn) + { + m_VV[*pa] = pa - m_SA; + *pa = mark_neg(1); + } + } + uint64_t choose_pivot(int_iter const & p, int64_t n) + { + int_iter pl, pm, pn; + int64_t s; + pm = p + (n >> 1); + if (n > 7LL) + { + pl = p; + pn = p + n - 1; + if (n > 40LL) + { + s = n >> 3; + pl = med3(pl, pl + s, pl + s + s); + pm = med3(pm - s, pm, pm + s); + pn = med3(pn - s - s, pn - s, pn); + } + pm = med3(pl, pm, pn); + } + return key(pm); + } + void sort_split(int_iter const & p, int64_t n) + { + int_iter pa, pb, pc, pd, pl, pm, pn; + uint64_t f, v; + int64_t s, t; + if (n < 7) + { + select_sort_split(p, n); + return; + } + v = choose_pivot(p, n); + pa = pb = p; + pc = pd = p + n - 1; + while (1) + { + while (pb <= pc && (f = key(pb)) <= v) + { + if (f == v) + { + swap(pa, pb); + ++pa; + } + ++pb; + } + while (pc >= pb && (f = key(pc)) >= v) + { + if (f == v) + { + swap(pc, pd); + --pd; + } + --pc; + } + if (pb > pc) + break; + swap(pb, + pc); + ++pb; + --pc; + } + pn = p + n; + if ((s = pa - p) > (t = pb - pa)) + s = t; + for (pl = p, pm = pb - s; s; --s, ++pl, ++pm) + swap(pl, pm); + if ((s = pd - pc) > (t = pn - pd - 1)) + s = t; + for (pl = pb, pm = pn - s; s; --s, ++pl, ++pm) + swap(pl, pm); + s = pb - pa; + t = pd - pc; + if (pa > pb) + { + if (s > 0) + { + std::cout << "s=" << s << ">0 but should be <0; n=" << n << std::endl; + } + } + if (pc > pd) + { + if (t > 0) + { + std::cout << "t=" << t << ">0 but should be <0; n=" << n << std::endl; + } + } + if (s > 0) + sort_split(p, s); + update_group(p + s, p + n - t - 1); + if (t > 0) + sort_split(p + n - t, t); + } + void bucketsort(int_iter const & x, int_iter const & p, int64_t n, int64_t k) + { + int_iter pi; + int64_t i, d, g; + uint64_t c; + for (pi = p; pi < p + k; ++pi) + *pi = mark_neg(1); + for (i = 0; i <= n; ++i) + { + x[i] = p[c = x[i]]; + p[c] = i; + } + for (pi = p + k - 1, i = n; pi >= p; --pi) + { + d = x[c = *pi]; + x[c] = g = i; + if (not_neg(d)) + { + p[i--] = c; + do + { + d = x[c = d]; + x[c] = g; + p[i--] = c; + } + while (not_neg(d)); + } + else + p[i--] = mark_neg(1); + } + } +public: + int64_t transform(int_iter const & x, int_iter const & p, int64_t n, int64_t k, int64_t l, int64_t q) + { + if (!(q >= k - l)) + { + std::cout << "q=" << q << " k-l=" << k - l << std::endl; + } + assert(q >= k - l); + DBG_OUT << "transform(n=" << n << ", k=" << k << ", l=" << l << ", q=" << q << ")" << std::endl; + uint64_t bb, cc, dd; + int64_t jj; + int_iter pi, pj; + int s = bits::hi(k - l) + (k > l); + uint8_t len = 0; + m_rr = 0; + for (bb = dd = 0; (int)m_rr < n && (int)len < m_msb + 1 - s && (int64_t)(cc = dd << s | (k - l)) <= q; + ++m_rr, len += s) + { + bb = bb << s | (x[m_rr] - l + 1); + dd = cc; + } + DBG_OUT << "m_rr=" << m_rr << std::endl; + uint64_t mm = (1ULL << (m_rr - 1) * s) - 1; + x[n] = l - 1; + if ((int64_t)dd <= n) + { + for (pi = p; pi <= p + dd; ++pi) + *pi = 0; + for (pi = x + m_rr, cc = bb; pi <= x + n; ++pi) + { + p[cc] = 1; + cc = (cc & mm) << s | (*pi - l + 1); + } + for (uint64_t i = 1; i < m_rr; ++i) + { + p[cc] = 1; + cc = (cc & mm) << s; + } + for (pi = p, jj = 1; pi <= p + dd; ++pi) + if (*pi) + *pi = jj++; + for (pi = x, pj = x + m_rr, cc = bb; pj <= x + n; ++pi, ++pj) + { + *pi = p[cc]; + cc = (cc & mm) << s | (*pj - l + 1); + } + while (pi < x + n) + { + *pi++ = p[cc]; + cc = (cc & mm) << s; + } + } + else + { + for (pi = x, pj = x + m_rr, cc = bb; pj <= x + n; ++pi, ++pj) + { + *pi = cc; + cc = (cc & mm) << s | (*pj - l + 1); + } + while (pi < x + n) + { + *pi++ = cc; + cc = (cc & mm) << s; + } + jj = dd + 1; + } + x[n] = 0; + DBG_OUT << "end transformation jj=" << jj << std::endl; + return jj; + } + void sort(int_iter const & x, int_iter const & p, int64_t n, int64_t k, int64_t l) + { + int_iter pi, pk; + m_VV = x; + m_SA = p; + if (n >= k - l) + { + int64_t j = transform(m_VV, m_SA, n, k, l, n); + DBG_OUT << "begin bucketsort j=" << j << std::endl; + bucketsort(m_VV, m_SA, n, j); + DBG_OUT << "end bucketsort" << std::endl; + } + else + { + transform(m_VV, m_SA, n, k, l, m_msb_mask - 1); + DBG_OUT << "initialize SA begin" << std::endl; + for (int64_t i = 0; i <= n; ++i) + m_SA[i] = i; + DBG_OUT << "initialize SA end" << std::endl; + m_hh = 0; + sort_split(m_SA, n + 1); + } + m_hh = m_rr; + while (to_sign(*m_SA) >= -n) + { + DBG_OUT << "SA = "; + DBG_OUT << std::endl; + DBG_OUT << "TEXT = "; + DBG_OUT << std::endl; + DBG_OUT << "*m_SA=" << to_sign(*m_SA) << std::endl; + pi = m_SA; + int64_t sl = 0; + DBG_OUT << "m_hh=" << m_hh << std::endl; + do + { + uint64_t s = *pi; + if (to_sign(s) < (int64_t)0) + { + pi += mark_pos(s); + sl += mark_pos(s); + } + else + { + if (sl) + { + *(pi - sl) = mark_neg(sl); + sl = 0; + } + pk = m_SA + m_VV[s] + 1; + sort_split(pi, pk - pi); + pi = pk; + } + } + while ((pi - m_SA) <= n); + if (sl) + *(pi - sl) = mark_neg(sl); + m_hh = 2 * m_hh; + DBG_OUT << "m_hh=" << m_hh << std::endl; + } + for (int64_t i = 0; i <= n; ++i) + { + m_SA[m_VV[i]] = i; + } + } + void do_sort(tIV & sa, tIV & x) + { + assert(x.size() > 0); + DBG_OUT << "x.width()=" << (int)x.width() << std::endl; + DBG_OUT << "x.size()=" << x.size() << std::endl; + DBG_OUT << "sa.width()=" << (int)sa.width() << std::endl; + DBG_OUT << "sa.size()=" << sa.size() << std::endl; + if (x.size() == 1) + { + sa = tIV(1, 0); + return; + } + int64_t max_symbol = 0, min_symbol = x.width() < 64 ? bits::lo_set[x.width()] : 0x7FFFFFFFFFFFFFFFLL; + for (size_type i = 0; i < x.size() - 1; ++i) + { + max_symbol = std::max(max_symbol, (int64_t)x[i]); + min_symbol = std::min(min_symbol, (int64_t)x[i]); + } + if (0 == min_symbol) + { + throw std::logic_error("Text contains 0-symbol. Suffix array can not be constructed."); + } + if (x[x.size() - 1] > 0) + { + throw std::logic_error("Last symbol is not 0-symbol. Suffix array can not be constructed."); + } + DBG_OUT << "sorter: min_symbol=" << min_symbol << std::endl; + DBG_OUT << "sorter: max_symbol=" << max_symbol << std::endl; + int64_t n = x.size() - 1; + DBG_OUT << "x.size()-1=" << x.size() - 1 << " n=" << n << std::endl; + uint8_t width = std::max(bits::hi(max_symbol) + 2, bits::hi(n + 1) + 2); + DBG_OUT << "sorter: width=" << (int)width << " max_symbol_width=" << bits::hi(max_symbol) + 1 + << " n_width=" << bits::hi(n) << std::endl; + util::expand_width(x, width); + sa = x; + if (sa.width() < x.width()) + { + throw std::logic_error("Fixed size suffix array is to small for the specified text."); + return; + } + m_msb = sa.width() - 1; + m_msb_mask = 1ULL << m_msb; + DBG_OUT << "sorter: m_msb=" << (int)m_msb << " m_msb_mask=" << m_msb_mask << std::endl; + sort(x.begin(), sa.begin(), x.size() - 1, max_symbol + 1, min_symbol); + } + void sort(tIV & sa, char const * file_name, uint8_t num_bytes) + { + DBG_OUT << "sorter: sort(" << file_name << ")" << std::endl; + DBG_OUT << "sizeof(int_vector<>::difference_type)=" << sizeof(int_vector<>::difference_type) << std::endl; + util::clear(sa); + tIV x; + if (num_bytes == 0 and typeid(typename tIV::reference) == typeid(uint64_t)) + { + DBG_OUT << "sorter: use int_vector<64>" << std::endl; + int_vector<> temp; + load_vector_from_file(temp, file_name, num_bytes); + x.resize(temp.size()); + for (size_type i = 0; i < temp.size(); ++i) + x[i] = temp[i]; + } + else + { + load_vector_from_file(x, file_name, num_bytes); + util::bit_compress(x); + } + do_sort(sa, x); + } + template + void sort(tIV & sa, t_vec & text) + { + tIV x; + x.resize(text.size()); + for (size_type i = 0; i < text.size(); ++i) + x[i] = text[i]; + do_sort(sa, x); + } +}; +} +} +#endif +namespace sdsl +{ +inline void construct_sa_se(cache_config & config) +{ + int_vector<8> text; + load_from_file(text, cache_file_name(conf::KEY_TEXT, config)); + if (text.size() <= 2) + { + int_vector_buffer<> sa(cache_file_name(conf::KEY_SA, config), std::ios::out, 8, 2); + if (text.size() == 2) + { + sa.push_back(1); + } + sa.push_back(0); + } + else + { + _construct_sa_se>(text, cache_file_name(conf::KEY_SA, config), 256, 0); + } + register_cache_file(conf::KEY_SA, config); +} +namespace algorithm +{ +template +void calculate_sa(unsigned char const * c, typename t_int_vec::size_type len, t_int_vec & sa) +{ + typedef typename t_int_vec::size_type size_type; + constexpr uint8_t t_width = t_int_vec::fixed_int_width; + if (len <= 1) + { + sa.width(1); + sa.resize(len); + if (len > 0) + sa[0] = 0; + return; + } + bool small_file = (sizeof(len) <= 4 or len < 0x7FFFFFFFULL); + if (small_file) + { + uint8_t sa_width = sa.width(); + if (32 == t_width or (0 == t_width and 32 >= sa_width)) + { + sa.width(32); + sa.resize(len); + divsufsort(c, (int32_t *)sa.data(), (int32_t)len); + if (sa_width != 32) + { + for (size_type i = 0, p = 0; i < len; ++i, p += sa_width) + { + sa.set_int(p, sa.get_int(i << 5, 32), sa_width); + } + sa.width(sa_width); + sa.resize(len); + } + } + else + { + if (sa.width() < bits::hi(len) + 1) + { + throw std::logic_error("width of int_vector is to small for the text!!!"); + } + int_vector<> sufarray(len, 0, 32); + divsufsort(c, (int32_t *)sufarray.data(), (int32_t)len); + sa.resize(len); + for (size_type i = 0; i < len; ++i) + { + sa[i] = sufarray[i]; + } + } + } + else + { + uint8_t sa_width = sa.width(); + sa.width(64); + sa.resize(len); + divsufsort64(c, (int64_t *)sa.data(), len); + if (sa_width != 64) + { + for (size_type i = 0, p = 0; i < len; ++i, p += sa_width) + { + sa.set_int(p, sa.get_int(i << 6, 64), sa_width); + } + sa.width(sa_width); + sa.resize(len); + } + } +} +} +template +void construct_sa(cache_config & config) +{ + static_assert(t_width == 0 or t_width == 8, + "construct_sa: width must be `0` for integer alphabet and `8` for byte alphabet"); + char const * KEY_TEXT = key_text_trait::KEY_TEXT; + if (t_width == 8) + { + if (construct_config().byte_algo_sa == LIBDIVSUFSORT) + { + read_only_mapper text(KEY_TEXT, config); + auto sa = write_out_mapper<0>::create(cache_file_name(conf::KEY_SA, config), 0, bits::hi(text.size()) + 1); + algorithm::calculate_sa((unsigned char const *)text.data(), text.size(), sa); + } + else if (construct_config().byte_algo_sa == SE_SAIS) + { + construct_sa_se(config); + } + } + else if (t_width == 0) + { + int_vector<> sa; + sdsl::qsufsort::construct_sa(sa, cache_file_name(KEY_TEXT, config).c_str(), 0); + store_to_cache(sa, conf::KEY_SA, config); + } + else + { + std::cerr << "Unknown alphabet type" << std::endl; + } +} +} +#endif +namespace sdsl +{ +template +bool contains_no_zero_symbol(int_vector const & text, std::string const & file) +{ + for (int_vector_size_type i = 0; i < text.size(); ++i) + { + if ((uint64_t)0 == text[i]) + { + throw std::logic_error(std::string("Error: File \"") + file + "\" contains zero symbol."); + return false; + } + } + return true; +} +template +void append_zero_symbol(int_vector & text) +{ + text.resize(text.size() + 1); + text[text.size() - 1] = 0; +} +template +void construct(t_index & idx, std::string file, uint8_t num_bytes = 0, bool move_input = false) +{ + tMSS file_map; + cache_config config; + if (is_ram_file(file)) + { + config.dir = "@"; + config.delete_data = move_input; + } + construct(idx, file, config, num_bytes); +} +template +void construct_im(t_index & idx, t_data && data, uint8_t num_bytes = 0) +{ + std::string tmp_file = ram_file_name(util::to_string(util::pid()) + "_" + util::to_string(util::id())); + store_to_file(data, tmp_file); + construct(idx, tmp_file, num_bytes, std::is_rvalue_reference::value); + ram_fs::remove(tmp_file); +} +template +void construct(t_index & idx, std::string const & file, cache_config & config, uint8_t num_bytes = 0) +{ + typename t_index::index_category index_tag; + construct(idx, file, config, num_bytes, index_tag); +} +template +void construct(t_index & idx, std::string const & file, cache_config & config, uint8_t num_bytes, wt_tag) +{ + auto event = memory_monitor::event("construct wavelet tree"); + if ((t_index::alphabet_category::WIDTH == 8 and num_bytes <= 1) + or (t_index::alphabet_category::WIDTH == 0 and num_bytes != 'd')) + { + int_vector_buffer text_buf(file, + std::ios::in, + 1024 * 1024, + num_bytes * 8, + (bool)num_bytes); + idx = t_index(text_buf.begin(), text_buf.end(), config.dir); + } + else + { + int_vector text; + load_vector_from_file(text, file, num_bytes); + std::string tmp_key = util::to_string(util::pid()) + "_" + util::to_string(util::id()); + std::string tmp_file_name = cache_file_name(tmp_key, config); + store_to_file(text, tmp_file_name); + util::clear(text); + { + int_vector_buffer text_buf(tmp_file_name); + idx = t_index(text_buf.begin(), text_buf.end(), config.dir); + } + sdsl::remove(tmp_file_name); + } +} +template +void construct(t_index & idx, std::string const & file, cache_config & config, uint8_t num_bytes, csa_tag) +{ + auto event = memory_monitor::event("construct CSA"); + constexpr uint8_t width = t_index::alphabet_category::WIDTH; + char const * KEY_TEXT = key_text_trait::KEY_TEXT; + char const * KEY_BWT = key_bwt_trait::KEY_BWT; + typedef int_vector text_type; + { + auto event = memory_monitor::event("parse input text"); + if (!cache_file_exists(KEY_TEXT, config)) + { + text_type text; + load_vector_from_file(text, file, num_bytes); + if (contains_no_zero_symbol(text, file)) + { + if (!is_ram_file(file)) + { + append_zero_symbol(text); + store_to_cache(text, KEY_TEXT, config); + } + else + { + auto text_mapper = write_out_mapper::create(cache_file_name(KEY_TEXT, config), + text.size() + 1, + text.width()); + std::copy(text.begin(), text.end(), text_mapper.begin()); + text_mapper[text.size()] = 0; + } + } + } + register_cache_file(KEY_TEXT, config); + } + if (config.delete_data) + { + sdsl::remove(file); + } + { + auto event = memory_monitor::event("SA"); + if (!cache_file_exists(conf::KEY_SA, config)) + { + construct_sa(config); + } + register_cache_file(conf::KEY_SA, config); + } + { + auto event = memory_monitor::event("BWT"); + if (!cache_file_exists(KEY_BWT, config)) + { + construct_bwt(config); + } + register_cache_file(KEY_BWT, config); + } + { + auto event = memory_monitor::event("construct CSA"); + idx = t_index(config); + } + if (config.delete_files) + { + auto event = memory_monitor::event("delete temporary files"); + util::delete_all_files(config.file_map); + } +} +template +void construct(t_index & idx, std::string const & file, cache_config & config, uint8_t num_bytes, lcp_tag) +{ + auto event = memory_monitor::event("construct compressed LCP"); + char const * KEY_TEXT = key_text_trait::KEY_TEXT; + typedef int_vector text_type; + { + auto event = memory_monitor::event("LCP"); + if (!cache_file_exists(conf::KEY_LCP, config)) + { + { + auto event = memory_monitor::event("parse input text"); + if (!cache_file_exists(KEY_TEXT, config)) + { + text_type text; + load_vector_from_file(text, file, num_bytes); + if (contains_no_zero_symbol(text, file)) + { + append_zero_symbol(text); + store_to_cache(text, KEY_TEXT, config); + } + } + register_cache_file(KEY_TEXT, config); + } + { + auto event = memory_monitor::event("SA"); + if (!cache_file_exists(conf::KEY_SA, config)) + { + construct_sa(config); + } + register_cache_file(conf::KEY_SA, config); + } + if (t_width == 8) + { + construct_lcp_semi_extern_PHI(config); + } + else + { + construct_lcp_PHI(config); + } + } + register_cache_file(conf::KEY_LCP, config); + } + { + auto event = memory_monitor::event("compressed LCP"); + idx = t_index(config); + } + if (config.delete_files) + { + auto event = memory_monitor::event("delete temporary files"); + util::delete_all_files(config.file_map); + } +} +template +void construct(t_index & idx, std::string const & file, cache_config & config, uint8_t num_bytes, lcp_tag tag) +{ + if (1 == num_bytes) + { + construct(idx, file, config, num_bytes, tag); + } + else + { + construct(idx, file, config, num_bytes, tag); + } +} +template +void construct(t_index & idx, std::string const & file, cache_config & config, uint8_t num_bytes, cst_tag) +{ + auto event = memory_monitor::event("construct CST"); + char const * KEY_TEXT = key_text_trait::KEY_TEXT; + char const * KEY_BWT = key_bwt_trait::KEY_BWT; + csa_tag csa_t; + { + typename t_index::csa_type csa; + if (!cache_file_exists(std::string(conf::KEY_CSA) + "_" + util::class_to_hash(csa), config)) + { + cache_config csa_config(false, config.dir, config.id, config.file_map); + construct(csa, file, csa_config, num_bytes, csa_t); + auto event = memory_monitor::event("store CSA"); + config.file_map = csa_config.file_map; + store_to_cache(csa, std::string(conf::KEY_CSA) + "_" + util::class_to_hash(csa), config); + } + register_cache_file(std::string(conf::KEY_CSA) + "_" + util::class_to_hash(csa), config); + } + { + auto event = memory_monitor::event("LCP"); + register_cache_file(KEY_TEXT, config); + register_cache_file(KEY_BWT, config); + register_cache_file(conf::KEY_SA, config); + if (!cache_file_exists(conf::KEY_LCP, config)) + { + if (t_index::alphabet_category::WIDTH == 8) + { + construct_lcp_semi_extern_PHI(config); + } + else + { + construct_lcp_PHI(config); + } + } + register_cache_file(conf::KEY_LCP, config); + } + { + auto event = memory_monitor::event("CST"); + idx = t_index(config); + } + if (config.delete_files) + { + auto event = memory_monitor::event("delete temporary files"); + util::delete_all_files(config.file_map); + } +} +} +#endif +#ifndef INCLUDED_SDSL_INV_PERM_SUPPORT +#define INCLUDED_SDSL_INV_PERM_SUPPORT +#include +#include +#include +#include +#include +namespace sdsl +{ +template +class inv_perm_support +{ +public: + typedef int_vector<> iv_type; + typedef iv_type::size_type size_type; + typedef iv_type::value_type value_type; + typedef iv_type::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef t_bv bit_vector_type; + typedef t_rank rank_type; +private: + iv_type const * m_v = nullptr; + iv_type m_back_pointer; + bit_vector_type m_marked; + rank_type m_rank_marked; +public: + inv_perm_support(){}; + inv_perm_support(inv_perm_support const & p) : + m_v(p.m_v), + m_back_pointer(p.m_back_pointer), + m_marked(p.m_marked), + m_rank_marked(p.m_rank_marked) + { + m_rank_marked.set_vector(&m_marked); + } + inv_perm_support(inv_perm_support && p) + { + *this = std::move(p); + } + inv_perm_support(iv_type const * v) : m_v(v) + { + bit_vector marked = bit_vector(m_v->size(), 0); + bit_vector done = bit_vector(m_v->size(), 0); + size_type max_back_pointer = 0; + for (size_type i = 0; i < m_v->size(); ++i) + { + if (!done[i]) + { + done[i] = 1; + size_type back_pointer = i, j = i, j_new = 0; + uint64_t steps = 0, all_steps = 0; + while ((j_new = (*m_v)[j]) != i) + { + j = j_new; + done[j] = 1; + ++steps; + ++all_steps; + if (t_s == steps) + { + max_back_pointer = std::max(max_back_pointer, back_pointer); + marked[j] = 1; + steps = 0; + back_pointer = j; + } + } + if (all_steps > t_s) + { + marked[i] = 1; + max_back_pointer = std::max(max_back_pointer, back_pointer); + } + } + } + m_marked = t_bv(std::move(marked)); + util::init_support(m_rank_marked, &m_marked); + done = bit_vector(m_v->size(), 0); + size_type n_bp = m_rank_marked(m_v->size()); + m_back_pointer = int_vector<>(n_bp, 0, bits::hi(max_back_pointer) + 1); + for (size_type i = 0; i < m_v->size(); ++i) + { + if (!done[i]) + { + done[i] = 1; + size_type back_pointer = i, j = i, j_new = 0; + uint64_t steps = 0, all_steps = 0; + while ((j_new = (*m_v)[j]) != i) + { + j = j_new; + done[j] = 1; + ++steps; + ++all_steps; + if (t_s == steps) + { + m_back_pointer[m_rank_marked(j)] = back_pointer; + steps = 0; + back_pointer = j; + } + } + if (all_steps > t_s) + { + m_back_pointer[m_rank_marked(i)] = back_pointer; + } + } + } + } + value_type operator[](size_type i) const + { + size_type j = i, j_new = 0; + while ((j_new = (*m_v)[j]) != i) + { + if (m_marked[j]) + { + j = m_back_pointer[m_rank_marked(j)]; + while ((j_new = (*m_v)[j]) != i) + j = j_new; + } + else + { + j = j_new; + } + } + return j; + } + size_type size() const + { + return nullptr == m_v ? 0 : m_v->size(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } + void set_vector(iv_type const * v) + { + m_v = v; + } + inv_perm_support & operator=(inv_perm_support const & p) + { + if (this != &p) + { + inv_perm_support tmp(p); + *this = std::move(tmp); + } + return *this; + } + inv_perm_support & operator=(inv_perm_support && p) + { + if (this != &p) + { + m_v = std::move(p.m_v); + m_back_pointer = std::move(p.m_back_pointer); + m_marked = std::move(p.m_marked); + m_rank_marked = std::move(p.m_rank_marked); + m_rank_marked.set_vector(&m_marked); + } + return *this; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_back_pointer.serialize(out, child, "back_pointer"); + written_bytes += m_marked.serialize(out, child, "marked"); + written_bytes += m_rank_marked.serialize(out, child, "rank_marked"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + m_back_pointer.load(in); + m_marked.load(in); + m_rank_marked.load(in, &m_marked); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_back_pointer)); + ar(CEREAL_NVP(m_marked)); + ar(CEREAL_NVP(m_rank_marked)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_back_pointer)); + ar(CEREAL_NVP(m_marked)); + ar(CEREAL_NVP(m_rank_marked)); + m_rank_marked.set_vector(&m_marked); + } + bool operator==(inv_perm_support const & other) const noexcept + { + return (m_back_pointer == other.m_back_pointer) && (m_marked == other.m_marked) + && (m_rank_marked == other.m_rank_marked); + } + bool operator!=(inv_perm_support const & other) const noexcept + { + return !(*this == other); + } +}; +} +#endif +#ifndef INCLUDED_SDSL_INT_WAVELET_TREE +#define INCLUDED_SDSL_INT_WAVELET_TREE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +template +class wt_int +{ +public: + typedef int_vector<>::size_type size_type; + typedef int_vector<>::value_type value_type; + typedef typename t_bitvector::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef t_bitvector bit_vector_type; + typedef t_rank rank_1_type; + typedef t_select select_1_type; + typedef t_select_zero select_0_type; + typedef wt_tag index_category; + typedef int_alphabet_tag alphabet_category; + enum + { + lex_ordered = 1 + }; + typedef std::pair point_type; + typedef std::vector point_vec_type; + typedef std::pair r2d_res_type; +protected: + size_type m_size = 0; + size_type m_sigma = 0; + bit_vector_type m_tree; + rank_1_type m_tree_rank; + select_1_type m_tree_select1; + select_0_type m_tree_select0; + uint32_t m_max_level = 0; +private: + void _interval_symbols(size_type i, + size_type j, + size_type & k, + std::vector & cs, + std::vector & rank_c_i, + std::vector & rank_c_j, + size_type level, + size_type path, + size_type node_size, + size_type offset) const + { + if (level >= m_max_level) + { + rank_c_i[k] = i; + rank_c_j[k] = j; + cs[k++] = path; + return; + } + size_type ones_before_o = m_tree_rank(offset); + size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o; + size_type ones_before_j = m_tree_rank(offset + j) - ones_before_o; + size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o; + if ((j - i) - (ones_before_j - ones_before_i) > 0) + { + size_type new_offset = offset + m_size; + size_type new_node_size = node_size - ones_before_end; + size_type new_i = i - ones_before_i; + size_type new_j = j - ones_before_j; + _interval_symbols(new_i, new_j, k, cs, rank_c_i, rank_c_j, level + 1, path << 1, new_node_size, new_offset); + } + if ((ones_before_j - ones_before_i) > 0) + { + size_type new_offset = offset + (node_size - ones_before_end) + m_size; + size_type new_node_size = ones_before_end; + size_type new_i = ones_before_i; + size_type new_j = ones_before_j; + _interval_symbols(new_i, + new_j, + k, + cs, + rank_c_i, + rank_c_j, + level + 1, + (path << 1) | 1, + new_node_size, + new_offset); + } + } +public: + size_type const & sigma = m_sigma; + bit_vector_type const & tree = m_tree; + uint32_t const & max_level = m_max_level; + wt_int() = default; + template + wt_int(t_it begin, t_it end, std::string tmp_dir = ram_file_name("")) : m_size(std::distance(begin, end)) + { + if (0 == m_size) + return; + m_sigma = 0; + value_type max_elem = 1; + for (auto it = begin; it != end; ++it) + { + value_type value = *it; + if (value > max_elem) + max_elem = value; + } + m_max_level = bits::hi(max_elem) + 1; + std::string tree_out_buf_file_name = tmp_file(tmp_dir, "_m_tree"); + { + int_vector<> rac(m_size, 0, m_max_level); + std::copy(begin, end, rac.begin()); + int_vector_buffer<> buf1(tmp_file(tmp_dir, "_wt_constr_buf"), std::ios::out, 10 * (1 << 20), m_max_level); + osfstream tree_out_buf(tree_out_buf_file_name, std::ios::binary | std::ios::trunc | std::ios::out); + size_type bit_size = m_size * m_max_level; + int_vector<1>::write_header(bit_size, 1, tree_out_buf); + size_type tree_pos = 0; + uint64_t tree_word = 0; + uint64_t mask_old = 1ULL << (m_max_level); + for (uint32_t k = 0; k < m_max_level; ++k) + { + size_type start = 0; + const uint64_t mask_new = 1ULL << (m_max_level - k - 1); + do + { + size_type i = start; + size_type cnt0 = 0; + size_type cnt1 = 0; + uint64_t start_value = (rac[i] & mask_old); + uint64_t x; + while (i < m_size and ((x = rac[i]) & mask_old) == start_value) + { + if (x & mask_new) + { + tree_word |= (1ULL << (tree_pos & 0x3FULL)); + buf1[cnt1++] = x; + } + else + { + rac[start + cnt0++] = x; + } + ++tree_pos; + if ((tree_pos & 0x3FULL) == 0) + { + tree_out_buf.write((char *)&tree_word, sizeof(tree_word)); + tree_word = 0; + } + ++i; + } + if (k + 1 < m_max_level) + { + for (size_type j = 0; j < cnt1; ++j) + { + rac[start + cnt0 + j] = buf1[j]; + } + } + else + { + m_sigma += (cnt0 > 0) + (cnt1 > 0); + } + start += cnt0 + cnt1; + } + while (start < m_size); + mask_old += mask_new; + } + if ((tree_pos & 0x3FULL) != 0) + { + tree_out_buf.write((char *)&tree_word, sizeof(tree_word)); + } + buf1.close(true); + tree_out_buf.close(); + } + bit_vector tree; + load_from_file(tree, tree_out_buf_file_name); + sdsl::remove(tree_out_buf_file_name); + m_tree = bit_vector_type(std::move(tree)); + util::init_support(m_tree_rank, &m_tree); + util::init_support(m_tree_select0, &m_tree); + util::init_support(m_tree_select1, &m_tree); + } + wt_int(wt_int const & wt) + { + m_size = wt.m_size; + m_sigma = wt.m_sigma; + m_tree = wt.m_tree; + m_tree_rank = wt.m_tree_rank; + m_tree_rank.set_vector(&m_tree); + m_tree_select1 = wt.m_tree_select1; + m_tree_select1.set_vector(&m_tree); + m_tree_select0 = wt.m_tree_select0; + m_tree_select0.set_vector(&m_tree); + m_max_level = wt.m_max_level; + } + wt_int(wt_int && wt) : + m_size(wt.m_size), + m_sigma(wt.m_sigma), + m_tree(std::move(wt.m_tree)), + m_tree_rank(std::move(wt.m_tree_rank)), + m_tree_select1(std::move(wt.m_tree_select1)), + m_tree_select0(std::move(wt.m_tree_select0)), + m_max_level(wt.m_max_level) + { + m_tree_rank.set_vector(&m_tree); + m_tree_select1.set_vector(&m_tree); + m_tree_select0.set_vector(&m_tree); + } + wt_int & operator=(wt_int const & wt) + { + if (this != &wt) + { + wt_int tmp(wt); + *this = std::move(tmp); + } + return *this; + } + wt_int & operator=(wt_int && wt) + { + if (this != &wt) + { + m_size = wt.m_size; + m_sigma = wt.m_sigma; + m_tree = std::move(wt.m_tree); + m_tree_rank = std::move(wt.m_tree_rank); + m_tree_rank.set_vector(&m_tree); + m_tree_select1 = std::move(wt.m_tree_select1); + m_tree_select1.set_vector(&m_tree); + m_tree_select0 = std::move(wt.m_tree_select0); + m_tree_select0.set_vector(&m_tree); + m_max_level = std::move(wt.m_max_level); + } + return *this; + } + size_type size() const + { + return m_size; + } + bool empty() const + { + return m_size == 0; + } + value_type operator[](size_type i) const + { + assert(i < size()); + size_type offset = 0; + value_type res = 0; + size_type node_size = m_size; + for (uint32_t k = 0; k < m_max_level; ++k) + { + res <<= 1; + size_type ones_before_o = m_tree_rank(offset); + size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o; + size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o; + if (m_tree[offset + i]) + { + offset += (node_size - ones_before_end); + node_size = ones_before_end; + i = ones_before_i; + res |= 1; + } + else + { + node_size = (node_size - ones_before_end); + i = (i - ones_before_i); + } + offset += m_size; + } + return res; + }; + size_type rank(size_type i, value_type c) const + { + assert(i <= size()); + if (((1ULL) << (m_max_level)) <= c) + { + return 0; + } + size_type offset = 0; + uint64_t mask = (1ULL) << (m_max_level - 1); + size_type node_size = m_size; + for (uint32_t k = 0; k < m_max_level and i; ++k) + { + size_type ones_before_o = m_tree_rank(offset); + size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o; + size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o; + if (c & mask) + { + offset += (node_size - ones_before_end); + node_size = ones_before_end; + i = ones_before_i; + } + else + { + node_size = (node_size - ones_before_end); + i = (i - ones_before_i); + } + offset += m_size; + mask >>= 1; + } + return i; + }; + std::pair inverse_select(size_type i) const + { + assert(i < size()); + value_type c = 0; + size_type node_size = m_size, offset = 0; + for (uint32_t k = 0; k < m_max_level; ++k) + { + size_type ones_before_o = m_tree_rank(offset); + size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o; + size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o; + c <<= 1; + if (m_tree[offset + i]) + { + offset += (node_size - ones_before_end); + node_size = ones_before_end; + i = ones_before_i; + c |= 1; + } + else + { + node_size = (node_size - ones_before_end); + i = (i - ones_before_i); + } + offset += m_size; + } + return std::make_pair(i, c); + } + size_type select(size_type i, value_type c) const + { + assert(1 <= i and i <= rank(size(), c)); + size_type offset = 0; + uint64_t mask = (1ULL) << (m_max_level - 1); + size_type node_size = m_size; + int_vector<64> m_path_off(max_level + 1); + int_vector<64> m_path_rank_off(max_level + 1); + m_path_off[0] = m_path_rank_off[0] = 0; + for (uint32_t k = 0; k < m_max_level and node_size; ++k) + { + size_type ones_before_o = m_tree_rank(offset); + m_path_rank_off[k] = ones_before_o; + size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o; + if (c & mask) + { + offset += (node_size - ones_before_end); + node_size = ones_before_end; + } + else + { + node_size = (node_size - ones_before_end); + } + offset += m_size; + m_path_off[k + 1] = offset; + mask >>= 1; + } + if (0ULL == node_size or node_size < i) + { + throw std::logic_error("select(" + util::to_string(i) + "," + util::to_string(c) + + "): c does not occur i times in the WT"); + return m_size; + } + mask = 1ULL; + for (uint32_t k = m_max_level; k > 0; --k) + { + offset = m_path_off[k - 1]; + size_type ones_before_o = m_path_rank_off[k - 1]; + if (c & mask) + { + i = m_tree_select1(ones_before_o + i) - offset + 1; + } + else + { + i = m_tree_select0(offset - ones_before_o + i) - offset + 1; + } + mask <<= 1; + } + return i - 1; + }; + void interval_symbols(size_type i, + size_type j, + size_type & k, + std::vector & cs, + std::vector & rank_c_i, + std::vector & rank_c_j) const + { + assert(i <= j and j <= size()); + k = 0; + if (i == j) + { + return; + } + if ((i + 1) == j) + { + auto res = inverse_select(i); + cs[0] = res.second; + rank_c_i[0] = res.first; + rank_c_j[0] = res.first + 1; + k = 1; + return; + } + _interval_symbols(i, j, k, cs, rank_c_i, rank_c_j, 0, 0, m_size, 0); + } + template > + t_ret_type lex_count(size_type i, size_type j, value_type c) const + { + assert(i <= j and j <= size()); + if (((1ULL) << (m_max_level)) <= c) + { + return t_ret_type{0, j - i, 0}; + } + size_type offset = 0; + size_type smaller = 0; + size_type greater = 0; + uint64_t mask = (1ULL) << (m_max_level - 1); + size_type node_size = m_size; + for (uint32_t k = 0; k < m_max_level; ++k) + { + size_type ones_before_o = m_tree_rank(offset); + size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o; + size_type ones_before_j = m_tree_rank(offset + j) - ones_before_o; + size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o; + if (c & mask) + { + offset += (node_size - ones_before_end); + node_size = ones_before_end; + smaller += j - i - ones_before_j + ones_before_i; + i = ones_before_i; + j = ones_before_j; + } + else + { + node_size -= ones_before_end; + greater += ones_before_j - ones_before_i; + i -= ones_before_i; + j -= ones_before_j; + } + offset += m_size; + mask >>= 1; + } + return t_ret_type{i, smaller, greater}; + } + template > + t_ret_type lex_smaller_count(size_type i, value_type c) const + { + assert(i <= size()); + if (((1ULL) << (m_max_level)) <= c) + { + return t_ret_type{0, i}; + } + size_type offset = 0; + size_type result = 0; + uint64_t mask = (1ULL) << (m_max_level - 1); + size_type node_size = m_size; + for (uint32_t k = 0; k < m_max_level and i; ++k) + { + size_type ones_before_o = m_tree_rank(offset); + size_type ones_before_i = m_tree_rank(offset + i) - ones_before_o; + size_type ones_before_end = m_tree_rank(offset + node_size) - ones_before_o; + if (c & mask) + { + offset += (node_size - ones_before_end); + node_size = ones_before_end; + result += i - ones_before_i; + i = ones_before_i; + } + else + { + node_size = (node_size - ones_before_end); + i -= ones_before_i; + } + offset += m_size; + mask >>= 1; + } + return t_ret_type{i, result}; + } + std::pair>> + range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, bool report = true) const + { + std::vector offsets(m_max_level + 1); + std::vector ones_before_os(m_max_level + 1); + offsets[0] = 0; + if (vrb > (1ULL << m_max_level)) + vrb = (1ULL << m_max_level); + if (vlb > vrb) + return make_pair(0, point_vec_type()); + size_type cnt_answers = 0; + point_vec_type point_vec; + _range_search_2d(lb, rb, vlb, vrb, 0, 0, m_size, offsets, ones_before_os, 0, point_vec, report, cnt_answers); + return make_pair(cnt_answers, point_vec); + } + void _range_search_2d(size_type lb, + size_type rb, + value_type vlb, + value_type vrb, + size_type level, + size_type ilb, + size_type node_size, + std::vector & offsets, + std::vector & ones_before_os, + size_type path, + point_vec_type & point_vec, + bool report, + size_type & cnt_answers) const + { + if (lb > rb) + return; + if (level == m_max_level) + { + if (report) + { + for (size_type j = lb + 1; j <= rb + 1; ++j) + { + size_type i = j; + size_type c = path; + for (uint32_t k = m_max_level; k > 0; --k) + { + size_type offset = offsets[k - 1]; + size_type ones_before_o = ones_before_os[k - 1]; + if (c & 1) + { + i = m_tree_select1(ones_before_o + i) - offset + 1; + } + else + { + i = m_tree_select0(offset - ones_before_o + i) - offset + 1; + } + c >>= 1; + } + point_vec.emplace_back(i - 1, path); + } + } + cnt_answers += rb - lb + 1; + return; + } + size_type irb = ilb + (1ULL << (m_max_level - level)); + size_type mid = (irb + ilb) >> 1; + size_type offset = offsets[level]; + size_type ones_before_o = m_tree_rank(offset); + ones_before_os[level] = ones_before_o; + size_type ones_before_lb = m_tree_rank(offset + lb); + size_type ones_before_rb = m_tree_rank(offset + rb + 1); + size_type ones_before_end = m_tree_rank(offset + node_size); + size_type zeros_before_o = offset - ones_before_o; + size_type zeros_before_lb = offset + lb - ones_before_lb; + size_type zeros_before_rb = offset + rb + 1 - ones_before_rb; + size_type zeros_before_end = offset + node_size - ones_before_end; + if (vlb < mid and mid) + { + size_type nlb = zeros_before_lb - zeros_before_o; + size_type nrb = zeros_before_rb - zeros_before_o; + offsets[level + 1] = offset + m_size; + if (nrb) + _range_search_2d(nlb, + nrb - 1, + vlb, + std::min(vrb, mid - 1), + level + 1, + ilb, + zeros_before_end - zeros_before_o, + offsets, + ones_before_os, + path << 1, + point_vec, + report, + cnt_answers); + } + if (vrb >= mid) + { + size_type nlb = ones_before_lb - ones_before_o; + size_type nrb = ones_before_rb - ones_before_o; + offsets[level + 1] = offset + m_size + (zeros_before_end - zeros_before_o); + if (nrb) + _range_search_2d(nlb, + nrb - 1, + std::max(mid, vlb), + vrb, + level + 1, + mid, + ones_before_end - ones_before_o, + offsets, + ones_before_os, + (path << 1) + 1, + point_vec, + report, + cnt_answers); + } + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += write_member(m_sigma, out, child, "sigma"); + written_bytes += m_tree.serialize(out, child, "tree"); + written_bytes += m_tree_rank.serialize(out, child, "tree_rank"); + written_bytes += m_tree_select1.serialize(out, child, "tree_select_1"); + written_bytes += m_tree_select0.serialize(out, child, "tree_select_0"); + written_bytes += write_member(m_max_level, out, child, "max_level"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(m_size, in); + read_member(m_sigma, in); + m_tree.load(in); + m_tree_rank.load(in, &m_tree); + m_tree_select1.load(in, &m_tree); + m_tree_select0.load(in, &m_tree); + read_member(m_max_level, in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_max_level)); + ar(CEREAL_NVP(m_tree)); + ar(CEREAL_NVP(m_tree_rank)); + ar(CEREAL_NVP(m_tree_select1)); + ar(CEREAL_NVP(m_tree_select0)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_max_level)); + ar(CEREAL_NVP(m_tree)); + ar(CEREAL_NVP(m_tree_rank)); + m_tree_rank.set_vector(&m_tree); + ar(CEREAL_NVP(m_tree_select1)); + m_tree_select1.set_vector(&m_tree); + ar(CEREAL_NVP(m_tree_select0)); + m_tree_select0.set_vector(&m_tree); + } + bool operator==(wt_int const & other) const noexcept + { + return (m_size == other.m_size) && (m_sigma == other.m_sigma) && (m_tree == other.m_tree) + && (m_tree_rank == other.m_tree_rank) && (m_tree_select1 == other.m_tree_select1) + && (m_tree_select0 == other.m_tree_select0) && (m_max_level == other.m_max_level); + } + bool operator!=(wt_int const & other) const noexcept + { + return !(*this == other); + } + struct node_type + { + size_type offset = 0; + size_type size = 0; + size_type level = 0; + value_type sym = 0; + node_type(size_type o = 0, size_type sz = 0, size_type l = 0, value_type sy = 0) : + offset(o), + size(sz), + level(l), + sym(sy) + {} + node_type(node_type const &) = default; + node_type(node_type &&) = default; + node_type & operator=(node_type const &) = default; + node_type & operator=(node_type &&) = default; + bool operator==(node_type const & v) const + { + return offset == v.offset; + } + bool operator<(node_type const & v) const + { + return offset < v.offset; + } + bool operator>(node_type const & v) const + { + return offset > v.offset; + } + }; + bool is_leaf(node_type const & v) const + { + return v.level == m_max_level; + } + value_type sym(node_type const & v) const + { + return v.sym; + } + auto bit_vec(node_type const & v) const -> node_bv_container + { + return node_bv_container(begin(v), end(v)); + } + auto seq(node_type const & v) const -> random_access_container> + { + return random_access_container>( + [&v, this](size_type i) + { + node_type vv = v; + while (!is_leaf(vv)) + { + auto vs = expand(vv); + auto rs = expand(vv, range_type{{0, i}}); + bool bit = *(begin(vv) + i); + i = std::get<1>(rs[bit]); + vv = vs[bit]; + } + return sym(vv); + }, + size(v)); + } + bool empty(node_type const & v) const + { + return v.size == (size_type)0; + } + auto size(node_type const & v) const -> decltype(v.size) + { + return v.size; + } + node_type root() const + { + return node_type(0, m_size, 0, 0); + } + std::array expand(node_type const & v) const + { + node_type v_right = v; + return expand(std::move(v_right)); + } + std::array expand(node_type && v) const + { + node_type v_left; + size_type offset_rank = m_tree_rank(v.offset); + size_type ones = m_tree_rank(v.offset + v.size) - offset_rank; + v_left.offset = v.offset + m_size; + v_left.size = v.size - ones; + v_left.level = v.level + 1; + v_left.sym = v.sym << 1; + v.offset = v.offset + m_size + v_left.size; + v.size = ones; + v.level = v.level + 1; + v.sym = (v.sym << 1) | 1; + return {{std::move(v_left), v}}; + } + std::array expand(node_type const & v, range_vec_type const & ranges) const + { + auto ranges_copy = ranges; + return expand(v, std::move(ranges_copy)); + } + std::array expand(node_type const & v, range_vec_type && ranges) const + { + auto v_sp_rank = m_tree_rank(v.offset); + range_vec_type res(ranges.size()); + size_t i = 0; + for (auto & r : ranges) + { + auto sp_rank = m_tree_rank(v.offset + r[0]); + auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank; + auto left_size = (r[1] - r[0] + 1) - right_size; + auto right_sp = sp_rank - v_sp_rank; + auto left_sp = r[0] - right_sp; + r = {{left_sp, left_sp + left_size - 1}}; + res[i++] = {{right_sp, right_sp + right_size - 1}}; + } + return {{ranges, std::move(res)}}; + } + std::array expand(node_type const & v, range_type const & r) const + { + auto v_sp_rank = m_tree_rank(v.offset); + auto sp_rank = m_tree_rank(v.offset + r[0]); + auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank; + auto left_size = (r[1] - r[0] + 1) - right_size; + auto right_sp = sp_rank - v_sp_rank; + auto left_sp = r[0] - right_sp; + return {{{{left_sp, left_sp + left_size - 1}}, {{right_sp, right_sp + right_size - 1}}}}; + } + std::pair path(value_type c) const + { + return {m_max_level, c}; + } +private: + auto begin(node_type const & v) const -> decltype(m_tree.begin() + v.offset) + { + return m_tree.begin() + v.offset; + } + auto end(node_type const & v) const -> decltype(m_tree.begin() + v.offset + v.size) + { + return m_tree.begin() + v.offset + v.size; + } +}; +} +#endif +namespace sdsl +{ +template +class _sa_order_sampling : public int_vector +{ +public: + typedef int_vector base_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::value_type value_type; + enum + { + sample_dens = t_csa::sa_sample_dens + }; + enum + { + text_order = false + }; + typedef sa_sampling_tag sampling_category; + _sa_order_sampling() + {} + _sa_order_sampling(cache_config const & cconfig, SDSL_UNUSED t_csa const * csa = nullptr) + { + int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, cconfig)); + size_type n = sa_buf.size(); + this->width(bits::hi(n) + 1); + this->resize((n + sample_dens - 1) / sample_dens); + for (size_type i = 0, cnt_mod = sample_dens, cnt_sum = 0; i < n; ++i, ++cnt_mod) + { + size_type sa = sa_buf[i]; + if (sample_dens == cnt_mod) + { + cnt_mod = 0; + base_type::operator[](cnt_sum++) = sa; + } + } + } + inline bool is_sampled(size_type i) const + { + return 0 == (i % sample_dens); + } + inline value_type operator[](size_type i) const + { + return base_type::operator[](i / sample_dens); + } +}; +template +struct sa_order_sa_sampling +{ + template + using type = _sa_order_sampling; + using sampling_category = sa_sampling_tag; +}; +template +class _text_order_sampling : public int_vector +{ +private: + t_bv m_marked; + t_rank m_rank_marked; +public: + typedef int_vector base_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::value_type value_type; + typedef t_bv bv_type; + enum + { + sample_dens = t_csa::sa_sample_dens + }; + enum + { + text_order = true + }; + typedef sa_sampling_tag sampling_category; + bv_type const & marked = m_marked; + t_rank const & rank_marked = m_rank_marked; + _text_order_sampling() + {} + _text_order_sampling(cache_config const & cconfig, SDSL_UNUSED t_csa const * csa = nullptr) + { + int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, cconfig)); + size_type n = sa_buf.size(); + bit_vector marked(n, 0); + this->width(bits::hi(n / sample_dens) + 1); + this->resize((n + sample_dens - 1) / sample_dens); + for (size_type i = 0, sa_cnt = 0; i < n; ++i) + { + size_type sa = sa_buf[i]; + if (0 == (sa % sample_dens)) + { + marked[i] = 1; + base_type::operator[](sa_cnt++) = sa / sample_dens; + } + } + m_marked = std::move(t_bv(marked)); + util::init_support(m_rank_marked, &m_marked); + } + _text_order_sampling(_text_order_sampling const & st) : base_type(st) + { + m_marked = st.m_marked; + m_rank_marked = st.m_rank_marked; + m_rank_marked.set_vector(&m_marked); + } + inline bool is_sampled(size_type i) const + { + return m_marked[i]; + } + inline value_type operator[](size_type i) const + { + return base_type::operator[](m_rank_marked(i)) * sample_dens; + } + value_type condensed_sa(size_type i) const + { + return base_type::operator[](i); + } + _text_order_sampling & operator=(_text_order_sampling const & st) + { + if (this != &st) + { + base_type::operator=(st); + m_marked = st.m_marked; + m_rank_marked = st.m_rank_marked; + m_rank_marked.set_vector(&m_marked); + } + return *this; + } + void swap(_text_order_sampling & st) + { + base_type::swap(st); + m_marked.swap(st.m_marked); + util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked)); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += base_type::serialize(out, child, "samples"); + written_bytes += m_marked.serialize(out, child, "marked"); + written_bytes += m_rank_marked.serialize(out, child, "rank_marked"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + base_type::load(in); + m_marked.load(in); + m_rank_marked.load(in); + m_rank_marked.set_vector(&m_marked); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + base_type::CEREAL_SAVE_FUNCTION_NAME(ar); + ar(CEREAL_NVP(m_marked)); + ar(CEREAL_NVP(m_rank_marked)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + base_type::CEREAL_LOAD_FUNCTION_NAME(ar); + ar(CEREAL_NVP(m_marked)); + ar(CEREAL_NVP(m_rank_marked)); + m_rank_marked.set_vector(&m_marked); + } +}; +template , class t_rank_sup = typename t_bit_vec::rank_1_type, uint8_t t_width = 0> +struct text_order_sa_sampling +{ + template + using type = _text_order_sampling; + using sampling_category = sa_sampling_tag; +}; +template , + class t_bv_isa = sd_vector<>, + class t_rank_sa = typename t_bv_sa::rank_1_type, + class t_select_isa = typename t_bv_isa::select_1_type> +class _fuzzy_sa_sampling +{ +private: + t_bv_sa m_marked_sa; + t_rank_sa m_rank_marked_sa; + t_bv_isa m_marked_isa; + t_select_isa m_select_marked_isa; + wt_int> m_inv_perm; +public: + typedef typename bit_vector::size_type size_type; + typedef typename bit_vector::value_type value_type; + typedef t_bv_sa bv_sa_type; + enum + { + sample_dens = t_csa::sa_sample_dens + }; + enum + { + text_order = true + }; + typedef sa_sampling_tag sampling_category; + t_bv_sa const & marked_sa = m_marked_sa; + t_rank_sa const & rank_marked_sa = m_rank_marked_sa; + t_bv_isa const & marked_isa = m_marked_isa; + t_select_isa const & select_marked_isa = m_select_marked_isa; + _fuzzy_sa_sampling() + {} + _fuzzy_sa_sampling(cache_config & cconfig, SDSL_UNUSED t_csa const * csa = nullptr) + { + { + if (!cache_file_exists(conf::KEY_ISA, cconfig)) + { + auto event = memory_monitor::event("ISA"); + construct_isa(cconfig); + } + register_cache_file(conf::KEY_SA, cconfig); + } + { + int_vector_buffer<> isa_buf(cache_file_name(conf::KEY_ISA, cconfig)); + size_type n = isa_buf.size(); + bit_vector marked_isa(n, 0); + bit_vector marked_sa(n, 0); + int_vector<> inv_perm((n + sample_dens - 1) / sample_dens, 0, bits::hi(n) + 1); + size_type cnt = 0; + uint64_t min_prev_val = 0; + for (size_type i = 0; i < n; i += sample_dens) + { + size_type pos_min = i; + size_type pos_cnd = isa_buf[i] >= min_prev_val ? i : n; + for (size_type j = i + 1; j < i + sample_dens and j < n; ++j) + { + if (isa_buf[j] < isa_buf[pos_min]) + pos_min = j; + if (isa_buf[j] >= min_prev_val) + { + if (pos_cnd == n) + { + pos_cnd = j; + } + else if (isa_buf[j] < isa_buf[pos_cnd]) + { + pos_cnd = j; + } + } + } + if (pos_cnd == n) + { + pos_cnd = pos_min; + } + min_prev_val = isa_buf[pos_cnd]; + marked_isa[pos_cnd] = 1; + inv_perm[cnt++] = min_prev_val; + marked_sa[min_prev_val] = 1; + } + m_marked_isa = std::move(t_bv_isa(marked_isa)); + util::init_support(m_select_marked_isa, &m_marked_isa); + { + rank_support_v<> rank_marked_sa(&marked_sa); + for (size_type i = 0; i < inv_perm.size(); ++i) + { + inv_perm[i] = rank_marked_sa(inv_perm[i]); + } + } + util::bit_compress(inv_perm); + m_marked_sa = std::move(t_bv_sa(marked_sa)); + util::init_support(m_rank_marked_sa, &m_marked_sa); + std::string tmp_key = + "fuzzy_isa_samples_" + util::to_string(util::pid()) + "_" + util::to_string(util::id()); + std::string tmp_file_name = cache_file_name(tmp_key, cconfig); + store_to_file(inv_perm, tmp_file_name); + construct(m_inv_perm, tmp_file_name, 0); + sdsl::remove(tmp_file_name); + } + } + _fuzzy_sa_sampling(_fuzzy_sa_sampling const & st) : + m_marked_sa(st.m_marked_sa), + m_rank_marked_sa(st.m_rank_marked_sa), + m_marked_isa(st.m_marked_isa), + m_select_marked_isa(st.m_select_marked_isa), + m_inv_perm(st.m_inv_perm) + { + m_rank_marked_sa.set_vector(&m_marked_sa); + m_select_marked_isa.set_vector(&m_marked_isa); + } + _fuzzy_sa_sampling(_fuzzy_sa_sampling && st) : + m_marked_sa(std::move(st.m_marked_sa)), + m_rank_marked_sa(std::move(st.m_rank_marked_sa)), + m_marked_isa(std::move(st.m_marked_isa)), + m_select_marked_isa(std::move(st.m_select_marked_isa)), + m_inv_perm(std::move(st.m_inv_perm)) + { + m_rank_marked_sa.set_vector(&m_marked_sa); + m_select_marked_isa.set_vector(&m_marked_isa); + } + inline bool is_sampled(size_type i) const + { + return m_marked_sa[i]; + } + inline value_type operator[](size_type i) const + { + return m_select_marked_isa(m_inv_perm.select(1, m_rank_marked_sa(i)) + 1); + } + inline value_type inv(size_type i) const + { + return m_inv_perm[i]; + } + size_type size() const + { + return m_inv_perm.size(); + } + _fuzzy_sa_sampling & operator=(_fuzzy_sa_sampling const & st) + { + if (this != &st) + { + _fuzzy_sa_sampling tmp(st); + *this = std::move(tmp); + } + return *this; + } + _fuzzy_sa_sampling & operator=(_fuzzy_sa_sampling && st) + { + m_marked_sa = std::move(st.m_marked_sa); + m_rank_marked_sa = std::move(st.m_rank_marked_sa); + m_marked_isa = std::move(st.m_marked_isa); + m_select_marked_isa = std::move(st.m_select_marked_isa); + m_inv_perm = std::move(st.m_inv_perm); + m_rank_marked_sa.set_vector(&m_marked_sa); + m_select_marked_isa.set_vector(&m_marked_isa); + return *this; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_marked_sa.serialize(out, child, "marked_sa"); + written_bytes += m_rank_marked_sa.serialize(out, child, "rank_marked_sa"); + written_bytes += m_marked_isa.serialize(out, child, "marked_isa"); + written_bytes += m_select_marked_isa.serialize(out, child, "select_marked_isa"); + written_bytes += m_inv_perm.serialize(out, child, "inv_perm"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + m_marked_sa.load(in); + m_rank_marked_sa.load(in); + m_rank_marked_sa.set_vector(&m_marked_sa); + m_marked_isa.load(in); + m_select_marked_isa.load(in); + m_select_marked_isa.set_vector(&m_marked_isa); + m_inv_perm.load(in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_marked_sa)); + ar(CEREAL_NVP(m_rank_marked_sa)); + ar(CEREAL_NVP(m_marked_isa)); + ar(CEREAL_NVP(m_select_marked_isa)); + ar(CEREAL_NVP(m_inv_perm)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_marked_sa)); + ar(CEREAL_NVP(m_rank_marked_sa)); + m_rank_marked_sa.set_vector(&m_marked_sa); + ar(CEREAL_NVP(m_marked_isa)); + ar(CEREAL_NVP(m_select_marked_isa)); + m_select_marked_isa.set_vector(&m_marked_isa); + ar(CEREAL_NVP(m_inv_perm)); + } + bool operator==(_fuzzy_sa_sampling const & other) const noexcept + { + return (m_marked_sa == other.m_marked_sa) && (m_rank_marked_sa == other.m_rank_marked_sa) + && (m_marked_isa == other.m_marked_isa) && (m_select_marked_isa == other.m_select_marked_isa) + && (m_inv_perm == other.m_inv_perm); + } + bool operator!=(_fuzzy_sa_sampling const & other) const noexcept + { + return !(*this == other); + } +}; +template , + class t_bv_isa = sd_vector<>, + class t_rank_sa = typename t_bv_sa::rank_1_type, + class t_select_isa = typename t_bv_isa::select_1_type> +struct fuzzy_sa_sampling +{ + template + using type = _fuzzy_sa_sampling; + using sampling_category = sa_sampling_tag; +}; +template +class _bwt_sampling : public int_vector +{ +private: + t_bv m_marked; + t_rank m_rank_marked; +public: + typedef int_vector base_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::value_type value_type; + enum + { + sample_dens = t_csa::sa_sample_dens + }; + enum + { + text_order = false + }; + typedef sa_sampling_tag sampling_category; + _bwt_sampling() + {} + _bwt_sampling(cache_config const & cconfig, SDSL_UNUSED t_csa const * csa = nullptr) + { + int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, cconfig)); + int_vector_buffer bwt_buf( + cache_file_name(key_bwt(), cconfig)); + size_type n = sa_buf.size(); + bit_vector marked(n, 0); + this->width(bits::hi(n) + 1); + int_vector<> sample_char; + typedef typename t_csa::char_type char_type; + std::set char_map; + if (load_from_cache(sample_char, conf::KEY_SAMPLE_CHAR, cconfig)) + { + for (uint64_t i = 0; i < sample_char.size(); ++i) + { + char_map.insert((char_type)sample_char[i]); + } + } + size_type sa_cnt = 0; + for (size_type i = 0; i < n; ++i) + { + size_type sa = sa_buf[i]; + char_type bwt = bwt_buf[i]; + if (0 == (sa % sample_dens)) + { + marked[i] = 1; + ++sa_cnt; + } + else if (char_map.find(bwt) != char_map.end()) + { + marked[i] = 1; + ++sa_cnt; + } + } + this->resize(sa_cnt); + sa_cnt = 0; + for (size_type i = 0; i < n; ++i) + { + size_type sa = sa_buf[i]; + if (marked[i]) + { + base_type::operator[](sa_cnt++) = sa; + } + } + m_marked = std::move(marked); + util::init_support(m_rank_marked, &m_marked); + } + _bwt_sampling(_bwt_sampling const & st) : base_type(st) + { + m_marked = st.m_marked; + m_rank_marked = st.m_rank_marked; + m_rank_marked.set_vector(&m_marked); + } + inline bool is_sampled(size_type i) const + { + return m_marked[i]; + } + inline value_type operator[](size_type i) const + { + return base_type::operator[](m_rank_marked(i)) * sample_dens; + } + _bwt_sampling & operator=(_bwt_sampling const & st) + { + if (this != &st) + { + base_type::operator=(st); + m_marked = st.m_marked; + m_rank_marked = st.m_rank_marked; + m_rank_marked.set_vector(&m_marked); + } + return *this; + } + void swap(_bwt_sampling & st) + { + base_type::swap(st); + m_marked.swap(st.m_marked); + util::swap_support(m_rank_marked, st.m_rank_marked, &m_marked, &(st.m_marked)); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += base_type::serialize(out, child, "samples"); + written_bytes += m_marked.serialize(out, child, "marked"); + written_bytes += m_rank_marked.serialize(out, child, "rank_marked"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + base_type::load(in); + m_marked.load(in); + m_rank_marked.load(in); + m_rank_marked.set_vector(&m_marked); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + base_type::CEREAL_SAVE_FUNCTION_NAME(ar); + ar(CEREAL_NVP(m_marked)); + ar(CEREAL_NVP(m_rank_marked)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + base_type::CEREAL_LOAD_FUNCTION_NAME(ar); + ar(CEREAL_NVP(m_marked)); + ar(CEREAL_NVP(m_rank_marked)); + m_rank_marked.set_vector(&m_marked); + } +}; +template +struct sa_bwt_sampling +{ + template + using type = _bwt_sampling; + using sampling_category = sa_sampling_tag; +}; +template +class _isa_sampling : public int_vector +{ +public: + typedef int_vector base_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::value_type value_type; + typedef typename t_csa::sa_sample_type sa_type; + enum + { + sample_dens = t_csa::isa_sample_dens + }; + typedef isa_sampling_tag sampling_category; + _isa_sampling() + {} + _isa_sampling(cache_config const & cconfig, SDSL_UNUSED sa_type const * sa_sample = nullptr) + { + int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, cconfig)); + size_type n = sa_buf.size(); + if (n >= 1) + { + this->width(bits::hi(n) + 1); + this->resize((n - 1) / sample_dens + 1); + } + for (size_type i = 0; i < this->size(); ++i) + base_type::operator[](i) = 0; + for (size_type i = 0; i < n; ++i) + { + size_type sa = sa_buf[i]; + if ((sa % sample_dens) == 0) + { + base_type::operator[](sa / sample_dens) = i; + } + } + } + inline value_type operator[](size_type i) const + { + return base_type::operator[](i / sample_dens); + } + inline std::tuple sample_leq(size_type i) const + { + size_type ci = i / sample_dens; + return std::make_tuple(base_type::operator[](ci), ci * sample_dens); + } + inline std::tuple sample_qeq(size_type i) const + { + size_type ci = (i / sample_dens + 1) % this->size(); + return std::make_tuple(base_type::operator[](ci), ci * sample_dens); + } + void load(std::istream & in, SDSL_UNUSED sa_type const * sa_sample = nullptr) + { + base_type::load(in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + base_type::CEREAL_SAVE_FUNCTION_NAME(ar); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + base_type::CEREAL_LOAD_FUNCTION_NAME(ar); + } + void set_vector(SDSL_UNUSED sa_type const *) + {} +}; +template +struct isa_sampling +{ + template + using type = _isa_sampling; + using sampling_category = isa_sampling_tag; +}; +template +class _text_order_isa_sampling_support +{ + static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens, + "ISA sampling requires: sa_sample_dens == isa_sample_dens"); +public: + typedef typename bit_vector::size_type size_type; + typedef typename bit_vector::value_type value_type; + typedef typename t_csa::sa_sample_type sa_type; + typedef typename sa_type::bv_type bv_type; + enum + { + sample_dens = t_csa::isa_sample_dens + }; + typedef isa_sampling_tag sampling_category; +private: + t_sel m_select_marked; + t_inv_perm m_inv_perm; +public: + t_sel const & select_marked = m_select_marked; + _text_order_isa_sampling_support() + {} + _text_order_isa_sampling_support(SDSL_UNUSED cache_config const & cconfig, + const typename std::enable_if::type sa_sample) + { + m_select_marked = t_sel(&(sa_sample->marked)); + int_vector<> const * perm = (int_vector<> const *)sa_sample; + m_inv_perm = t_inv_perm(perm); + m_inv_perm.set_vector(perm); + } + _text_order_isa_sampling_support(_text_order_isa_sampling_support const & st) + { + m_inv_perm = st.m_inv_perm; + m_select_marked = st.m_select_marked; + } + inline value_type operator[](size_type i) const + { + return m_select_marked(m_inv_perm[i / sample_dens] + 1); + } + inline std::tuple sample_leq(size_type i) const + { + size_type ci = i / sample_dens; + return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci * sample_dens); + } + inline std::tuple sample_qeq(size_type i) const + { + size_type ci = (i / sample_dens + 1) % m_inv_perm.size(); + return std::make_tuple(m_select_marked(m_inv_perm[ci] + 1), ci * sample_dens); + } + _text_order_isa_sampling_support & operator=(_text_order_isa_sampling_support const & st) + { + if (this != &st) + { + m_inv_perm = st.m_inv_perm; + m_select_marked = st.m_select_marked; + } + return *this; + } + void swap(_text_order_isa_sampling_support & st) + { + if (this != &st) + { + m_inv_perm.swap(st.m_inv_perm); + m_select_marked.swap(st.m_select_marked); + } + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_inv_perm.serialize(out, child, "inv_perm"); + written_bytes += m_select_marked.serialize(out, child, "select_marked"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in, sa_type const * sa_sample = nullptr) + { + m_inv_perm.load(in); + m_select_marked.load(in); + set_vector(sa_sample); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_inv_perm)); + ar(CEREAL_NVP(m_select_marked)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar, sa_type const * sa_sample = nullptr) + { + ar(CEREAL_NVP(m_inv_perm)); + ar(CEREAL_NVP(m_select_marked)); + set_vector(sa_sample); + } + bool operator==(_text_order_isa_sampling_support const & other) const noexcept + { + return (m_inv_perm == other.m_inv_perm) && (m_select_marked == other.m_select_marked); + } + bool operator!=(_text_order_isa_sampling_support const & other) const noexcept + { + return !(*this == other); + } + void set_vector(sa_type const * sa_sample = nullptr) + { + if (sa_sample == nullptr) + { + m_select_marked.set_vector(nullptr); + m_inv_perm.set_vector(nullptr); + } + else + { + m_select_marked.set_vector(&(sa_sample->marked)); + m_inv_perm.set_vector((int_vector<> const *)sa_sample); + } + } +}; +template , class t_sel = void> +struct text_order_isa_sampling_support +{ + template + using type = _text_order_isa_sampling_support< + t_csa, + t_inv_perm, + typename std::conditional::value, + typename t_csa::sa_sample_type::bv_type::select_1_type, + t_sel>::type>; + using sampling_category = isa_sampling_tag; +}; +template +class _fuzzy_isa_sampling_support +{ + static_assert(t_csa::sa_sample_dens == t_csa::isa_sample_dens, + "ISA sampling requires: sa_sample_dens==isa_sample_dens"); +public: + typedef typename bit_vector::size_type size_type; + typedef typename bit_vector::value_type value_type; + typedef typename t_csa::sa_sample_type sa_type; + enum + { + sample_dens = t_csa::isa_sample_dens + }; + typedef isa_sampling_tag sampling_category; +private: + sa_type const * m_sa_p = nullptr; + t_select_sa m_select_marked_sa; +public: + _fuzzy_isa_sampling_support() + {} + _fuzzy_isa_sampling_support(SDSL_UNUSED cache_config const & cconfig, sa_type const * sa_sample) : m_sa_p(sa_sample) + { + util::init_support(m_select_marked_sa, &(sa_sample->marked_sa)); + } + _fuzzy_isa_sampling_support(_fuzzy_isa_sampling_support const & st) : m_select_marked_sa(st.m_select_marked_sa) + { + set_vector(st.m_sa_p); + } + inline value_type operator[](size_type i) const + { + return m_sa_p->inv(i); + } + inline std::tuple sample_leq(size_type i) const + { + size_type ci = i / sample_dens; + size_type j = m_sa_p->select_marked_isa(ci + 1); + if (j > i) + { + if (ci > 0) + { + ci = ci - 1; + } + else + { + ci = m_sa_p->size() - 1; + } + j = m_sa_p->select_marked_isa(ci + 1); + } + return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j); + } + inline std::tuple sample_qeq(size_type i) const + { + size_type ci = i / sample_dens; + size_type j = m_sa_p->select_marked_isa(ci + 1); + if (j < i) + { + if (ci < m_sa_p->size() - 1) + { + ci = ci + 1; + } + else + { + ci = 0; + } + j = m_sa_p->select_marked_isa(ci + 1); + } + return std::make_tuple(m_select_marked_sa(m_sa_p->inv(ci) + 1), j); + } + _fuzzy_isa_sampling_support & operator=(_fuzzy_isa_sampling_support const & st) + { + if (this != &st) + { + m_select_marked_sa = st.m_select_marked_sa; + set_vector(st.m_sa_p); + } + return *this; + } + void swap(_fuzzy_isa_sampling_support & st) + { + m_select_marked_sa.swap(st.m_select_marked_sa); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_select_marked_sa.serialize(out, child, "select_marked_sa"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in, sa_type const * sa_sample = nullptr) + { + m_select_marked_sa.load(in); + set_vector(sa_sample); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_select_marked_sa)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar, sa_type const * sa_sample = nullptr) + { + ar(CEREAL_NVP(m_select_marked_sa)); + set_vector(sa_sample); + } + bool operator==(_fuzzy_isa_sampling_support const & other) const noexcept + { + return (m_select_marked_sa == other.m_select_marked_sa); + } + bool operator!=(_fuzzy_isa_sampling_support const & other) const noexcept + { + return !(*this == other); + } + void set_vector(sa_type const * sa_sample = nullptr) + { + m_sa_p = sa_sample; + if (nullptr != m_sa_p) + { + m_select_marked_sa.set_vector(&(sa_sample->marked_sa)); + } + } +}; +template +struct fuzzy_isa_sampling_support +{ + template + using type = + _fuzzy_isa_sampling_support::value, + typename t_csa::sa_sample_type::bv_sa_type::select_1_type, + t_select_sa>::type>; + using sampling_category = isa_sampling_tag; +}; +} +#endif +#ifndef SDSL_ENC_VECTOR +#define SDSL_ENC_VECTOR +#include +#include +#include +#include +#include +#ifndef SDSL_CODER_ELIAS_DELTA +#define SDSL_CODER_ELIAS_DELTA +#include +#include +namespace sdsl +{ +namespace coder +{ +template +class elias_delta +{ +public: + typedef uint64_t size_type; + static struct impl + { + uint32_t prefixsum[1 << 16]; + uint16_t prefixsum_8bit[(1 << 8) * 8]; + impl() + { + for (uint64_t x = 0; x < (1 << 16); ++x) + { + uint64_t const * w = &x; + uint64_t value = 0; + uint16_t numbers = 0, offset = 0, offset2 = 0; + while ((x >> offset) != 0) + { + uint64_t len_1_len = bits::read_unary_bounded(w, offset), len = 0; + if (len_1_len == 0) + { + offset += 1; + value += 1; + ++numbers; + } + else + { + offset2 = offset + len_1_len + 1; + len = bits::read_int_bounded(w, offset2, len_1_len) + (1ULL << len_1_len); + offset2 += len_1_len; + if (offset2 + len - 1 <= 16) + { + value += bits::read_int_bounded(w, offset2, len - 1) + (1ULL << (len - 1)); + offset = offset2 + len - 1; + ++numbers; + } + else + break; + } + } + uint32_t result = 0; + result = (offset << 24) | (numbers << 16) | value; + if (value > 0) + assert(offset > 0 and numbers > 0 and offset <= 16 and numbers <= 16); + prefixsum[x] = result; + } + for (uint32_t maxi = 1, idx = 0; maxi <= 8; ++maxi) + { + for (uint64_t x = 0; x < (1 << 8); ++x) + { + uint64_t const * w = &x; + uint64_t value = 0; + uint32_t numbers = 0, offset = 0, offset2 = 0; + while ((x >> offset) != 0 and numbers < maxi) + { + uint64_t len_1_len = bits::read_unary_bounded(w, offset), len = 0; + if (len_1_len == 0) + { + offset += 1; + value += 1; + ++numbers; + } + else + { + offset2 = offset + len_1_len + 1; + len = bits::read_int_bounded(w, offset2, len_1_len) + (1ULL << len_1_len); + offset2 += len_1_len; + if (offset2 + len - 1 <= 8) + { + value += bits::read_int_bounded(w, offset2, len - 1) + (1ULL << (len - 1)); + offset = offset2 + len - 1; + ++numbers; + } + else + break; + } + } + uint16_t result = 0; + result = (offset << 8) | (numbers << 4) | value; + prefixsum_8bit[idx++] = result; + } + } + } + } data; + static const uint8_t min_codeword_length = 1; + static uint8_t encoding_length(uint64_t); + template + static uint64_t decode(uint64_t const * data, const size_type start_idx, size_type n, t_iter it = (t_iter) nullptr); + static uint64_t decode_prefix_sum(uint64_t const * d, const size_type start_idx, size_type n); + static uint64_t + decode_prefix_sum(uint64_t const * d, const size_type start_idx, const size_type end_idx, size_type n); + template + static bool encode(int_vector const & v, int_vector & z); + template + static bool decode(int_vector const & z, int_vector & v); + static void encode(uint64_t x, uint64_t *& z, uint8_t & offset); + template + static uint64_t * raw_data(int_vector & v) + { + return v.m_data; + } +}; +template +inline uint8_t elias_delta::encoding_length(uint64_t w) +{ + uint8_t len_1 = w ? bits::hi(w) : 64; + return len_1 + (bits::hi(len_1 + 1) << 1) + 1; +} +template +template +inline bool elias_delta::encode(int_vector const & v, int_vector & z) +{ + typedef typename int_vector::size_type size_type; + z.width(v.width()); + size_type z_bit_size = 0; + uint64_t w; + const uint64_t zero_val = v.width() < 64 ? (1ULL) << v.width() : 0; + for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it) + { + if ((w = *it) == 0) + { + w = zero_val; + } + z_bit_size += encoding_length(w); + } + z.bit_resize(z_bit_size); + z.shrink_to_fit(); + if (z_bit_size & 0x3F) + { + *(z.m_data + (z_bit_size >> 6)) = 0; + } + z_bit_size = 0; + uint64_t * z_data = z.m_data; + uint8_t offset = 0; + size_type len, len_1_len; + for (typename int_vector::const_iterator it = v.begin(), end = v.end(); it != end; ++it) + { + w = *it; + if (w == 0) + { + w = zero_val; + } + len = w ? bits::hi(w) + 1 : 65; + len_1_len = bits::hi(len); + bits::write_int_and_move(z_data, 1ULL << len_1_len, offset, len_1_len + 1); + if (len_1_len) + { + bits::write_int_and_move(z_data, len, offset, len_1_len); + bits::write_int_and_move(z_data, w, offset, len - 1); + } + } + return true; +} +template +inline void elias_delta::encode(uint64_t x, uint64_t *& z, uint8_t & offset) +{ + uint8_t len, len_1_len; + len = x ? bits::hi(x) + 1 : 65; + len_1_len = bits::hi(len); + bits::write_int_and_move(z, 1ULL << len_1_len, offset, len_1_len + 1); + if (len_1_len) + { + bits::write_int_and_move(z, len, offset, len_1_len); + bits::write_int_and_move(z, x, offset, len - 1); + } +} +template +inline uint64_t +elias_delta::decode_prefix_sum(uint64_t const * d, const size_type start_idx, const size_type end_idx, size_type n) +{ + if (n == 0) + return 0; + uint64_t const * lastdata = d + ((end_idx + 63) >> 6); + d += (start_idx >> 6); + uint64_t w = 0, value = 0; + int16_t buffered = 0, read = start_idx & 0x3F; + size_type i = 0; + if (n + read <= 64) + { + if (((*d >> read) & bits::lo_set[n]) == bits::lo_set[n]) + return n; + } + else + { + if ((*d >> read) == bits::lo_set[64 - read]) + { + value = 64 - read; + ++d; + n -= (64 - read); + read = 0; + while (n >= 64) + { + if (*d == 0xFFFFFFFFFFFFFFFFULL) + { + value += 64; + ++d; + n -= 64; + } + else + goto start_decoding; + } + if ((*d & bits::lo_set[n]) == bits::lo_set[n]) + return value + n; + } + } +start_decoding: + while (i < n) + { + while (buffered < 64 and d < lastdata) + { + fill_buffer: + w |= (((*d) >> read) << buffered); + if (read >= buffered) + { + ++d; + buffered += 64 - read; + read = 0; + } + else + { + read += 64 - buffered; + buffered = 64; + } + } + uint32_t rbp = bits::lo(~w); + if (rbp > 0) + { + i += rbp; + value += rbp; + if (i >= n) + { + return value - (i - n); + } + assert((int64_t)buffered >= rbp); + buffered -= rbp; + w >>= rbp; + if (buffered < 16) + goto fill_buffer; + } + { + begin_decode: + uint32_t psum = elias_delta::data.prefixsum[w & 0x0000FFFF]; + if (!psum or i + ((psum >> 16) & 0x00FF) > n) + { + if (w == 0) + { + w |= (((*d) >> read) << buffered); + if (read >= buffered) + { + ++d; + buffered += 64 - read; + read = 0; + } + else + { + read += 64 - buffered; + buffered = 64; + }; + if (!w) + { + w |= (((*d) >> read) << buffered); + if (read >= buffered) + { + ++d; + buffered += 64 - read; + read = 0; + } + else + { + read += 64 - buffered; + buffered = 64; + }; + } + } + uint16_t len_1_len = bits::lo(w); + buffered -= (len_1_len + 1); + w >>= (len_1_len + 1); + if (len_1_len > buffered) + { + w |= (((*d) >> read) << buffered); + if (read >= buffered) + { + ++d; + buffered += 64 - read; + read = 0; + } + else + { + read += 64 - buffered; + buffered = 64; + }; + if (len_1_len > buffered) + { + w |= (((*d) >> read) << buffered); + if (read >= buffered) + { + ++d; + buffered += 64 - read; + read = 0; + } + else + { + read += 64 - buffered; + buffered = 64; + }; + } + } + uint16_t len_1 = (w & bits::lo_set[len_1_len]) + (1ULL << len_1_len) - 1; + buffered -= len_1_len; + w >>= len_1_len; + if (len_1 > buffered) + { + w |= (((*d) >> read) << buffered); + if (read >= buffered) + { + ++d; + buffered += 64 - read; + read = 0; + } + else + { + read += 64 - buffered; + buffered = 64; + }; + if (len_1 > buffered) + { + w |= (((*d) >> read) << buffered); + if (read >= buffered) + { + ++d; + buffered += 64 - read; + read = 0; + } + else + { + read += 64 - buffered; + buffered = 64; + }; + } + } + value += (w & bits::lo_set[len_1]) + (len_1 < 64) * (1ULL << (len_1)); + buffered -= len_1; + if (len_1 < 64) + { + w >>= len_1; + } + else + { + w = 0; + } + ++i; + if (i == n) + return value; + if (buffered >= 16) + goto begin_decode; + } + else + { + value += (psum & 0x0000FFFF); + i += ((psum >> 16) & 0x00FF); + if (i == n) + return value; + buffered -= (psum >> 24); + w >>= (psum >> 24); + if (buffered >= 16) + goto begin_decode; + } + } + }; + return value; +} +template +inline uint64_t elias_delta::decode_prefix_sum(uint64_t const * d, const size_type start_idx, size_type n) +{ + if (n == 0) + return 0; + d += (start_idx >> 6); + uint64_t value = 0; + size_type i = 0; + uint8_t offset = start_idx & 0x3F; + if (n < 24) + { + if (n + offset <= 64) + { + if (((*d >> offset) & bits::lo_set[n]) == bits::lo_set[n]) + return n; + } + else + { + if ((*d >> offset) == bits::lo_set[64 - offset]) + { + value = 64 - offset; + ++d; + n -= (64 - offset); + offset = 0; + while (n >= 64) + { + if (*d == 0xFFFFFFFFFFFFFFFFULL) + { + value += 64; + ++d; + n -= 64; + } + else + { + uint8_t temp = bits::lo(~(*d)); + value += temp; + n -= temp; + offset = temp; + goto start_decoding; + } + } + if ((*d & bits::lo_set[n]) == bits::lo_set[n]) + return value + n; + } + } + } +start_decoding: + while (i < n) + { + if (((*d >> offset) & 0xF) == 0xF) + { + uint8_t maxdecode = n - i > 63 ? 63 : n - i; + uint8_t rbp = bits::lo(~bits::read_int(d, offset, maxdecode)); + i += rbp; + value += rbp; + if (rbp + offset >= 64) + { + ++d; + offset = (rbp + offset) & 0x3F; + } + else + { + offset += rbp; + } + if (rbp == maxdecode) + continue; + } + while (i < n) + { + uint32_t psum = elias_delta::data.prefixsum[bits::read_int(d, offset, 16)]; + if (psum == 0) + { + goto decode_single; + } + else if (i + ((psum >> 16) & 0x00FF) > n) + { + if (n - i <= 8) + { + psum = elias_delta::data.prefixsum_8bit[bits::read_int(d, offset, 8) | ((n - i - 1) << 8)]; + if (psum > 0) + { + value += (psum & 0xF); + i += ((psum >> 4) & 0xF); + offset += (psum >> 8); + if (offset >= 64) + { + offset &= 0x3F; + ++d; + } + } + } + break; + } + else + { + value += (psum & 0x0000FFFF); + i += ((psum >> 16) & 0x00FF); + offset += (psum >> 24); + if (offset >= 64) + { + offset &= 0x3F; + ++d; + } + } + } + if (i < n) + { + decode_single: + i++; + uint16_t len_1_len = bits::read_unary_and_move(d, offset); + uint16_t len_1 = bits::read_int_and_move(d, offset, len_1_len) + (1ULL << len_1_len) - 1; + value += bits::read_int_and_move(d, offset, len_1) + (len_1 < 64) * (1ULL << (len_1)); + } + } + return value; +} +template +template +inline bool elias_delta::decode(int_vector const & z, int_vector & v) +{ + typename int_vector::size_type len_1_len, len, n = 0; + uint64_t const * z_data = z.data(); + uint64_t const * z_end = z.data() + (z.bit_size() >> 6); + uint8_t offset = 0; + while ((z_data < z_end) or (z_data == z_end and offset < (z.bit_size() & 0x3F))) + { + len_1_len = bits::read_unary_and_move(z_data, offset); + if (len_1_len) + { + len = bits::read_int_and_move(z_data, offset, len_1_len) + (1ULL << len_1_len); + bits::move_right(z_data, offset, len - 1); + } + ++n; + } + v.width(z.width()); + v.resize(n); + v.shrink_to_fit(); + return decode(z.data(), 0, n, v.begin()); +} +template +template +inline uint64_t elias_delta::decode(uint64_t const * d, const size_type start_idx, size_type n, t_iter it) +{ + d += (start_idx >> 6); + uint64_t value = 0; + size_type i = 0; + size_type len_1_len, len; + uint8_t offset = start_idx & 0x3F; + while (i++ < n) + { + if (!t_sumup) + value = 0; + len_1_len = bits::read_unary_and_move(d, offset); + if (!len_1_len) + { + value += 1; + } + else + { + len = bits::read_int_and_move(d, offset, len_1_len) + (1ULL << len_1_len); + value += bits::read_int_and_move(d, offset, len - 1) + (len - 1 < 64) * (1ULL << (len - 1)); + } + if (t_inc) + *(it++) = value; + } + return value; +} +template +typename elias_delta::impl elias_delta::data; +} +} +#endif +namespace sdsl +{ +template +struct enc_vector_trait +{ + typedef int_vector<0> int_vector_type; +}; +template <> +struct enc_vector_trait<32> +{ + typedef int_vector<32> int_vector_type; +}; +template <> +struct enc_vector_trait<64> +{ + typedef int_vector<64> int_vector_type; +}; +template , uint32_t t_dens = 128, uint8_t t_width = 0> +class enc_vector +{ +private: + static_assert(t_dens > 1, "enc_vector: sample density must be larger than `1`"); +public: + typedef uint64_t value_type; + typedef random_access_const_iterator iterator; + typedef iterator const_iterator; + typedef const value_type reference; + typedef const value_type const_reference; + typedef value_type const * const_pointer; + typedef ptrdiff_t difference_type; + typedef int_vector<>::size_type size_type; + typedef t_coder coder; + typedef typename enc_vector_trait::int_vector_type int_vector_type; + typedef iv_tag index_category; + static const uint32_t sample_dens = t_dens; + typedef enc_vector enc_vec_type; + int_vector<0> m_z; +private: + int_vector_type m_sample_vals_and_pointer; + size_type m_size = 0; + void clear() + { + m_z.resize(0); + m_z.shrink_to_fit(); + m_size = 0; + m_sample_vals_and_pointer.resize(0); + m_sample_vals_and_pointer.shrink_to_fit(); + } +public: + enc_vector() = default; + enc_vector(enc_vector const &) = default; + enc_vector(enc_vector &&) = default; + enc_vector & operator=(enc_vector const &) = default; + enc_vector & operator=(enc_vector &&) = default; + template + enc_vector(Container const & c); + template + enc_vector(int_vector_buffer & v_buf); + ~enc_vector() + {} + size_type size() const + { + return m_size; + } + static size_type max_size() + { + return int_vector<>::max_size() / 2; + } + bool empty() const + { + return 0 == m_size; + } + const const_iterator begin() const + { + return const_iterator(this, 0); + } + const const_iterator end() const + { + return const_iterator(this, this->m_size); + } + bool operator==(enc_vector const & v) const + { + return m_size && v.m_size && m_z == v.m_z && m_sample_vals_and_pointer == v.m_sample_vals_and_pointer; + } + bool operator!=(enc_vector const & v) const + { + return !(*this == v); + } + value_type operator[](size_type i) const; + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const; + void load(std::istream & in); + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const; + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar); + value_type sample(const size_type i) const; + uint32_t get_sample_dens() const + { + return t_dens; + } + void get_inter_sampled_values(const size_type i, uint64_t * it) const + { + *(it++) = 0; + if (i * t_dens + t_dens - 1 < size()) + { + t_coder::template decode(m_z.data(), m_sample_vals_and_pointer[(i << 1) + 1], t_dens - 1, it); + } + else + { + assert(i * t_dens < size()); + t_coder::template decode(m_z.data(), + m_sample_vals_and_pointer[(i << 1) + 1], + size() - i * t_dens - 1, + it); + } + }; +}; +template +inline typename enc_vector::value_type +enc_vector::operator[](const size_type i) const +{ + assert(i + 1 != 0); + assert(i < m_size); + size_type idx = i / get_sample_dens(); + return m_sample_vals_and_pointer[idx << 1] + + t_coder::decode_prefix_sum(m_z.data(), m_sample_vals_and_pointer[(idx << 1) + 1], i - t_dens * idx); +} +template +inline typename enc_vector::value_type +enc_vector::sample(const size_type i) const +{ + assert(i * get_sample_dens() + 1 != 0); + assert(i * get_sample_dens() < m_size); + return m_sample_vals_and_pointer[i << 1]; +} +template +template +enc_vector::enc_vector(Container const & c) +{ + clear(); + if (c.empty()) + return; + typename Container::const_iterator it = c.begin(), end = c.end(); + typename Container::value_type v1 = *it, v2, max_sample_value = 0, x; + size_type samples = 0; + size_type z_size = 0; + for (size_type i = 0, no_sample = 0; it != end; ++it, ++i, --no_sample) + { + v2 = *it; + if (!no_sample) + { + no_sample = get_sample_dens(); + if (max_sample_value < v2) + max_sample_value = v2; + ++samples; + } + else + { + z_size += t_coder::encoding_length(v2 - v1); + } + v1 = v2; + } + { + if (max_sample_value > z_size + 1) + m_sample_vals_and_pointer.width(bits::hi(max_sample_value) + 1); + else + m_sample_vals_and_pointer.width(bits::hi(z_size + 1) + 1); + m_sample_vals_and_pointer.resize(2 * samples + 2); + util::set_to_value(m_sample_vals_and_pointer, 0); + typename int_vector_type::iterator sv_it = m_sample_vals_and_pointer.begin(); + z_size = 0; + size_type no_sample = 0; + for (it = c.begin(); it != end; ++it, --no_sample) + { + v2 = *it; + if (!no_sample) + { + no_sample = get_sample_dens(); + *sv_it = v2; + ++sv_it; + *sv_it = z_size; + ++sv_it; + } + else + { + x = v2 - v1; + z_size += t_coder::encoding_length(x); + } + v1 = v2; + } + *sv_it = 0; + ++sv_it; + *sv_it = z_size + 1; + ++sv_it; + m_z = int_vector<>(z_size, 0, 1); + uint64_t * z_data = t_coder::raw_data(m_z); + uint8_t offset = 0; + no_sample = 0; + for (it = c.begin(); it != end; ++it, --no_sample) + { + v2 = *it; + if (!no_sample) + { + no_sample = get_sample_dens(); + } + else + { + t_coder::encode(v2 - v1, z_data, offset); + } + v1 = v2; + } + } + m_size = c.size(); +} +template +template +enc_vector::enc_vector(int_vector_buffer & v_buf) +{ + clear(); + size_type n = v_buf.size(); + if (n == 0) + return; + value_type v1 = 0, v2 = 0, max_sample_value = 0; + size_type samples = 0, z_size = 0; + const size_type sd = get_sample_dens(); + for (size_type i = 0, no_sample = 0; i < n; ++i, --no_sample) + { + v2 = v_buf[i]; + if (!no_sample) + { + no_sample = sd; + if (max_sample_value < v2) + max_sample_value = v2; + ++samples; + } + else + { + z_size += t_coder::encoding_length(v2 - v1); + } + v1 = v2; + } + if (max_sample_value > z_size + 1) + m_sample_vals_and_pointer.width(bits::hi(max_sample_value) + 1); + else + m_sample_vals_and_pointer.width(bits::hi(z_size + 1) + 1); + m_sample_vals_and_pointer.resize(2 * samples + 2); + util::set_to_value(m_sample_vals_and_pointer, 0); + m_z = int_vector<>(z_size, 0, 1); + uint64_t * z_data = t_coder::raw_data(m_z); + uint8_t offset = 0; + z_size = 0; + for (size_type i = 0, j = 0, no_sample = 0; i < n; ++i, --no_sample) + { + v2 = v_buf[i]; + if (!no_sample) + { + no_sample = sd; + m_sample_vals_and_pointer[j++] = v2; + m_sample_vals_and_pointer[j++] = z_size; + } + else + { + z_size += t_coder::encoding_length(v2 - v1); + t_coder::encode(v2 - v1, z_data, offset); + } + v1 = v2; + } + m_size = n; +} +template +enc_vector<>::size_type +enc_vector::serialize(std::ostream & out, structure_tree_node * v, std::string name) const +{ + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += m_z.serialize(out, child, "encoded deltas"); + written_bytes += m_sample_vals_and_pointer.serialize(out, child, "samples_and_pointers"); + structure_tree::add_size(child, written_bytes); + return written_bytes; +} +template +void enc_vector::load(std::istream & in) +{ + read_member(m_size, in); + m_z.load(in); + m_sample_vals_and_pointer.load(in); +} +template +template +void enc_vector::CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const +{ + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_z)); + ar(CEREAL_NVP(m_sample_vals_and_pointer)); +} +template +template +void enc_vector::CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) +{ + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_z)); + ar(CEREAL_NVP(m_sample_vals_and_pointer)); +} +} +#endif +#ifndef INCLUDED_SDSL_SUFFIX_ARRAY_HELPER +#define INCLUDED_SDSL_SUFFIX_ARRAY_HELPER +#include +namespace sdsl +{ +template +typename t_csa::char_type first_row_symbol(const typename t_csa::size_type i, t_csa const & csa) +{ + assert(i < csa.size()); + if (csa.sigma < 16) + { + typename t_csa::size_type res = 1; + while (res < csa.sigma and csa.C[res] <= i) + ++res; + return csa.comp2char[res - 1]; + } + else + { + typename t_csa::size_type upper_c = csa.sigma, + lower_c = 0; + typename t_csa::size_type res = 0; + do + { + res = (upper_c + lower_c) / 2; + if (i < csa.C[res]) + { + upper_c = res; + } + else if (i >= csa.C[res + 1]) + { + lower_c = res + 1; + } + } + while (i < csa.C[res] or i >= csa.C[res + 1]); + return csa.comp2char[res]; + } +} +template +struct traverse_csa_psi_trait +{ + typedef typename t_csa::value_type value_type; + typedef typename t_csa::size_type size_type; + static value_type access(t_csa const & csa, size_type i) + { + return csa.psi[i]; + } +}; +template +struct traverse_csa_psi_trait +{ + typedef typename t_csa::value_type value_type; + typedef typename t_csa::size_type size_type; + static value_type access(t_csa const & csa, size_type i) + { + return csa.isa[(csa[i] + csa.size() - 1) % csa.size()]; + } +}; +template +class traverse_csa_psi +{ +public: + typedef typename t_csa::value_type value_type; + typedef typename t_csa::size_type size_type; + typedef typename t_csa::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef csa_member_tag category; + typedef int_alphabet_tag alphabet_category; +private: + t_csa const & m_csa; +public: + traverse_csa_psi(t_csa const & csa_psi) : m_csa(csa_psi) + {} + traverse_csa_psi(traverse_csa_psi const & tcsa) : m_csa(tcsa.m_csa) + {} + value_type operator[](size_type i) const + { + assert(i < size()); + return traverse_csa_psi_trait::access(m_csa, i); + } + size_type size() const + { + return m_csa.size(); + } + size_type empty() const + { + return m_csa.empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } +}; +template +struct traverse_csa_saisa_trait +{ + typedef typename t_csa::value_type value_type; + typedef typename t_csa::size_type size_type; + static value_type access(t_csa const & csa, size_type i) + { + return csa.isa[(csa[i] + 1) % csa.size()]; + } +}; +template +struct traverse_csa_saisa_trait +{ + typedef typename t_csa::value_type value_type; + typedef typename t_csa::size_type size_type; + static value_type access(t_csa const & csa, size_type i) + { + return csa.isa[(csa[i] + csa.size() - 1) % csa.size()]; + } +}; +template +class traverse_csa_saisa +{ +public: + typedef typename t_csa::value_type value_type; + typedef typename t_csa::size_type size_type; + typedef typename t_csa::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef csa_member_tag category; + typedef int_alphabet_tag alphabet_category; +private: + t_csa const & m_csa; +public: + traverse_csa_saisa(t_csa const & csa) : m_csa(csa) + {} + traverse_csa_saisa(traverse_csa_saisa const & tcsa) : m_csa(tcsa.m_csa) + {} + value_type operator[](size_type i) const + { + assert(i < size()); + return traverse_csa_saisa_trait::access(m_csa, i); + } + size_type size() const + { + return m_csa.size(); + } + size_type empty() const + { + return m_csa, empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } +}; +template +class bwt_of_csa_psi +{ +public: + typedef typename t_csa::char_type value_type; + typedef typename t_csa::size_type size_type; + typedef typename t_csa::char_type char_type; + typedef typename t_csa::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef csa_member_tag category; + typedef typename t_csa::alphabet_category alphabet_category; +private: + t_csa const & m_csa; +public: + bwt_of_csa_psi(t_csa const & csa) : m_csa(csa) + {} + value_type operator[](size_type i) const + { + assert(i < size()); + size_type pos = m_csa.lf[i]; + return first_row_symbol(pos, m_csa); + } + size_type rank(size_type i, const char_type c) const + { + return m_csa.rank_bwt(i, c); + } + size_type select(size_type i, const char_type c) const + { + return m_csa.select_bwt(i, c); + } + size_type size() const + { + return m_csa.size(); + } + size_type empty() const + { + return m_csa.empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } +}; +template +struct traverse_csa_wt_traits +{ + typedef typename t_csa::value_type value_type; + typedef typename t_csa::char_type char_type; + typedef typename t_csa::size_type size_type; + static value_type access(t_csa const & csa, size_type i) + { + char_type c = csa.F[i]; + return csa.wavelet_tree.select(i - csa.C[csa.char2comp[c]] + 1, c); + } +}; +template +struct traverse_csa_wt_traits +{ + typedef typename t_csa::value_type value_type; + typedef typename t_csa::char_type char_type; + typedef typename t_csa::size_type size_type; + static value_type access(t_csa const & csa, size_type i) + { + typename t_csa::char_type c; + auto rc = csa.wavelet_tree.inverse_select(i); + size_type j = rc.first; + c = rc.second; + return csa.C[csa.char2comp[c]] + j; + } +}; +template +class traverse_csa_wt +{ +public: + typedef typename t_csa::value_type value_type; + typedef typename t_csa::size_type size_type; + typedef typename t_csa::char_type char_type; + typedef typename t_csa::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef csa_member_tag category; + typedef int_alphabet_tag alphabet_category; +private: + t_csa const & m_csa; + traverse_csa_wt(){}; +public: + traverse_csa_wt(t_csa const & csa_wt) : m_csa(csa_wt) + {} + value_type operator[](size_type i) const + { + assert(i < m_csa.size()); + return traverse_csa_wt_traits::access(m_csa, i); + } + size_type size() const + { + return m_csa.size(); + } + size_type empty() const + { + return m_csa.empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } +}; +template +class bwt_of_csa_wt +{ +public: + typedef const typename t_csa::char_type value_type; + typedef typename t_csa::size_type size_type; + typedef typename t_csa::char_type char_type; + typedef typename t_csa::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef csa_member_tag category; + typedef typename t_csa::alphabet_category alphabet_category; +private: + t_csa const & m_csa; + bwt_of_csa_wt(){}; +public: + bwt_of_csa_wt(t_csa const & csa_wt) : m_csa(csa_wt) + {} + value_type operator[](size_type i) const + { + assert(i < size()); + return m_csa.wavelet_tree[i]; + } + size_type size() const + { + return m_csa.size(); + } + size_type rank(size_type i, const char_type c) const + { + return m_csa.rank_bwt(i, c); + } + size_type select(size_type i, const char_type c) const + { + return m_csa.select(i, c); + } + size_type empty() const + { + return m_csa.empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } +}; +template +class isa_of_csa_wt +{ +public: + typedef typename t_csa::value_type value_type; + typedef typename t_csa::size_type size_type; + typedef typename t_csa::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef csa_member_tag category; + typedef int_alphabet_tag alphabet_category; +private: + t_csa const & m_csa; + isa_of_csa_wt(){}; +public: + isa_of_csa_wt(t_csa const & csa_wt) : m_csa(csa_wt) + {} + value_type operator[](size_type i) const + { + assert(i < size()); + auto sample = m_csa.isa_sample.sample_qeq(i); + value_type result = std::get<0>(sample); + if (std::get<1>(sample) < i) + { + i = std::get<1>(sample) + m_csa.size() - i; + } + else + { + i = std::get<1>(sample) - i; + } + while (i--) + { + result = m_csa.lf[result]; + } + return result; + } + size_type size() const + { + return m_csa.size(); + } + size_type empty() const + { + return m_csa.empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } +}; +template +class isa_of_csa_psi +{ +public: + typedef typename t_csa::value_type value_type; + typedef typename t_csa::size_type size_type; + typedef typename t_csa::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef csa_member_tag category; + typedef int_alphabet_tag alphabet_category; +private: + t_csa const & m_csa; + isa_of_csa_psi(){}; +public: + isa_of_csa_psi(t_csa const & csa_wt) : m_csa(csa_wt) + {} + value_type operator[](size_type i) const + { + assert(i < size()); + auto sample = m_csa.isa_sample.sample_leq(i); + value_type result = std::get<0>(sample); + i = i - std::get<1>(sample); + while (i--) + { + result = m_csa.psi[result]; + } + return result; + } + size_type size() const + { + return m_csa.size(); + } + size_type empty() const + { + return m_csa.empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } +}; +template +class first_row_of_csa +{ +public: + typedef const typename t_csa::char_type value_type; + typedef typename t_csa::size_type size_type; + typedef typename t_csa::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef csa_member_tag category; + typedef typename t_csa::alphabet_category alphabet_category; +private: + t_csa const & m_csa; +public: + first_row_of_csa(t_csa const & csa) : m_csa(csa) + {} + value_type operator[](size_type i) const + { + assert(i < size()); + return first_row_symbol(i, m_csa); + } + size_type size() const + { + return m_csa.size(); + } + size_type empty() const + { + return m_csa.empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } +}; +template +class text_of_csa +{ +public: + typedef typename t_csa::char_type value_type; + typedef typename t_csa::size_type size_type; + typedef typename t_csa::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef csa_member_tag category; + typedef typename t_csa::alphabet_category alphabet_category; +private: + t_csa const & m_csa; + text_of_csa() + {} +public: + text_of_csa(t_csa const & csa) : m_csa(csa) + {} + value_type operator[](size_type i) const + { + assert(i < size()); + return first_row_symbol(m_csa.isa[i], m_csa); + } + size_type size() const + { + return m_csa.size(); + } + size_type empty() const + { + return m_csa.empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } +}; +} +#endif +namespace sdsl +{ +template , + uint32_t t_dens = 32, + uint32_t t_inv_dens = 64, + class t_sa_sample_strat = sa_order_sa_sampling<>, + class t_isa_sample_strat = isa_sampling<>, + class t_alphabet_strat = byte_alphabet + > +class csa_sada +{ + static_assert(is_enc_vec::value, "First template argument has to be of type env_vector."); + static_assert(t_dens > 0, "Second template argument has to be greater then 0."); + static_assert(t_inv_dens > 0, "Third template argument has to be greater then 0."); + static_assert(std::is_same::type, sa_sampling_tag>::value, + "Forth template argument has to be a suffix array sampling strategy."); + static_assert(std::is_same::type, isa_sampling_tag>::value, + "Fifth template argument has to be a inverse suffix array sampling strategy."); + static_assert(is_alphabet::value, "Sixth template argument has to be a alphabet strategy."); + friend class bwt_of_csa_psi; +public: + enum + { + sa_sample_dens = t_dens, + isa_sample_dens = t_inv_dens + }; + typedef uint64_t value_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef const value_type const_reference; + typedef const_reference reference; + typedef const_reference * pointer; + typedef const pointer const_pointer; + typedef int_vector<>::size_type size_type; + typedef size_type csa_size_type; + typedef ptrdiff_t difference_type; + typedef t_enc_vec enc_vector_type; + typedef enc_vector_type psi_type; + typedef traverse_csa_psi lf_type; + typedef bwt_of_csa_psi bwt_type; + typedef isa_of_csa_psi isa_type; + typedef text_of_csa text_type; + typedef first_row_of_csa first_row_type; + typedef typename t_sa_sample_strat::template type sa_sample_type; + typedef typename t_isa_sample_strat::template type isa_sample_type; + typedef t_alphabet_strat alphabet_type; + typedef typename alphabet_type::alphabet_category alphabet_category; + typedef typename alphabet_type::comp_char_type comp_char_type; + typedef typename alphabet_type::char_type char_type; + typedef typename alphabet_type::string_type string_type; + typedef csa_sada csa_type; + typedef csa_tag index_category; + typedef psi_tag extract_category; + friend class traverse_csa_psi; + friend class traverse_csa_psi; + static const uint32_t linear_decode_limit = 100000; +private: + enc_vector_type m_psi; + sa_sample_type m_sa_sample; + isa_sample_type m_isa_sample; + alphabet_type m_alphabet; + mutable std::vector m_psi_buf; + void create_buffer() + { + if (enc_vector_type::sample_dens < linear_decode_limit) + { + m_psi_buf = std::vector(enc_vector_type::sample_dens + 1); + } + } +public: + const typename alphabet_type::char2comp_type & char2comp = m_alphabet.char2comp; + const typename alphabet_type::comp2char_type & comp2char = m_alphabet.comp2char; + const typename alphabet_type::C_type & C = m_alphabet.C; + const typename alphabet_type::sigma_type & sigma = m_alphabet.sigma; + psi_type const & psi = m_psi; + const lf_type lf = lf_type(*this); + const bwt_type bwt = bwt_type(*this); + const isa_type isa = isa_type(*this); + const bwt_type L = bwt_type(*this); + const first_row_type F = first_row_type(*this); + const text_type text = text_type(*this); + sa_sample_type const & sa_sample = m_sa_sample; + isa_sample_type const & isa_sample = m_isa_sample; + csa_sada() + { + create_buffer(); + } + ~csa_sada() + {} + csa_sada(csa_sada const & csa) : + m_psi(csa.m_psi), + m_sa_sample(csa.m_sa_sample), + m_isa_sample(csa.m_isa_sample), + m_alphabet(csa.m_alphabet) + { + create_buffer(); + m_isa_sample.set_vector(&m_sa_sample); + } + csa_sada(csa_sada && csa) : + m_psi(std::move(csa.m_psi)), + m_sa_sample(std::move(csa.m_sa_sample)), + m_isa_sample(std::move(csa.m_isa_sample)), + m_alphabet(std::move(csa.m_alphabet)) + { + create_buffer(); + m_isa_sample.set_vector(&m_sa_sample); + } + csa_sada(cache_config & config); + size_type size() const + { + return m_psi.size(); + } + static size_type max_size() + { + return t_enc_vec::max_size(); + } + bool empty() const + { + return m_psi.empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } + inline value_type operator[](size_type i) const; + csa_sada & operator=(csa_sada const & csa) + { + if (this != &csa) + { + csa_sada tmp(csa); + *this = std::move(tmp); + } + return *this; + } + csa_sada & operator=(csa_sada && csa) + { + if (this != &csa) + { + m_psi = std::move(csa.m_psi); + m_sa_sample = std::move(csa.m_sa_sample); + m_isa_sample = std::move(csa.m_isa_sample); + m_isa_sample.set_vector(&m_sa_sample); + m_alphabet = std::move(csa.m_alphabet); + m_psi_buf = std::move(csa.m_psi_buf); + } + return *this; + } + bool operator==(csa_sada const & other) const noexcept + { + return (m_psi == other.m_psi) && (m_sa_sample == other.m_sa_sample) && (m_isa_sample == other.m_isa_sample) + && (m_alphabet == other.m_alphabet); + } + bool operator!=(csa_sada const & other) const noexcept + { + return !(*this == other); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const; + void load(std::istream & in); + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const; + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar); + uint32_t get_sample_dens() const + { + return t_dens; + } +private: + size_type rank_bwt(size_type i, const char_type c) const + { + comp_char_type cc = char2comp[c]; + if (cc == 0 and c != 0) + return 0; + if (i == 0) + return 0; + assert(i <= size()); + size_type lower_b, upper_b; + const size_type sd = m_psi.get_sample_dens(); + size_type lower_sb = (C[cc] + sd - 1) / sd; + size_type upper_sb = (C[cc + 1] + sd - 1) / sd; + while (lower_sb + 1 < upper_sb) + { + size_type mid = (lower_sb + upper_sb) / 2; + if (m_psi.sample(mid) >= i) + upper_sb = mid; + else + lower_sb = mid; + } + if (lower_sb == upper_sb) + { + lower_b = C[cc]; + upper_b = C[cc + 1]; + } + else if (lower_sb > (C[cc] + sd - 1) / sd) + { + lower_b = lower_sb * sd; + if (0 == m_psi_buf.size()) + { + upper_b = std::min(upper_sb * sd, C[cc + 1]); + goto finish; + } + uint64_t * p = m_psi_buf.data(); + m_psi.get_inter_sampled_values(lower_sb, p); + p = m_psi_buf.data(); + uint64_t smpl = m_psi.sample(lower_sb); + if (lower_b + m_psi.get_sample_dens() >= C[cc + 1]) + m_psi_buf[C[cc + 1] - lower_b] = size() - smpl; + else + m_psi_buf[m_psi.get_sample_dens()] = size() - smpl; + while ((*p++) + smpl < i) + ; + return p - 1 - m_psi_buf.data() + lower_b - C[cc]; + } + else + { + if (m_psi.sample(lower_sb) >= i) + { + lower_b = C[cc]; + upper_b = lower_sb * sd + 1; + } + else + { + lower_b = lower_sb * sd; + upper_b = std::min(upper_sb * sd, C[cc + 1]); + } + } + finish: + while (lower_b + 1 < upper_b) + { + size_type mid = (lower_b + upper_b) / 2; + if (m_psi[mid] >= i) + upper_b = mid; + else + lower_b = mid; + } + if (lower_b > C[cc]) + return lower_b - C[cc] + 1; + else + { + return m_psi[lower_b] < i; + } + } + size_type select_bwt(size_type i, const char_type c) const + { + assert(i > 0); + comp_char_type cc = char2comp[c]; + if (cc == 0 and c != 0) + return size(); + assert(cc != 255); + if (C[cc] + i - 1 < C[cc + 1]) + { + return m_psi[C[cc] + i - 1]; + } + else + return size(); + } +}; +template +csa_sada::csa_sada(cache_config & config) +{ + create_buffer(); + if (!cache_file_exists(key_bwt(), config)) + { + return; + } + size_type n = 0; + { + int_vector_buffer bwt_buf( + cache_file_name(key_bwt(), config)); + n = bwt_buf.size(); + auto event = memory_monitor::event("construct csa-alpbabet"); + m_alphabet = alphabet_type(bwt_buf, n); + } + { + auto event = memory_monitor::event("sample SA"); + m_sa_sample = sa_sample_type(config); + } + { + auto event = memory_monitor::event("sample ISA"); + isa_sample_type isa_s(config, &m_sa_sample); + util::swap_support(m_isa_sample, isa_s, &m_sa_sample, (sa_sample_type const *)nullptr); + } + int_vector<> cnt_chr(sigma, 0, bits::hi(n) + 1); + for (typename alphabet_type::sigma_type i = 0; i < sigma; ++i) + { + cnt_chr[i] = C[i]; + } + { + auto event = memory_monitor::event("construct PSI"); + int_vector_buffer bwt_buf( + cache_file_name(key_bwt(), config)); + std::string psi_file = cache_file_name(conf::KEY_PSI, config); + auto psi = write_out_mapper<>::create(psi_file, n, bits::hi(n) + 1); + for (size_type i = 0; i < n; ++i) + { + psi[cnt_chr[char2comp[bwt_buf[i]]]++] = i; + } + register_cache_file(conf::KEY_PSI, config); + } + { + auto event = memory_monitor::event("encode PSI"); + int_vector_buffer<> psi_buf(cache_file_name(conf::KEY_PSI, config)); + m_psi = t_enc_vec(psi_buf); + } +} +template +inline auto +csa_sada::operator[](size_type i) const + -> value_type +{ + size_type off = 0; + while (!m_sa_sample.is_sampled(i)) + { + i = psi[i]; + ++off; + } + value_type result = m_sa_sample[i]; + if (result < off) + { + return m_psi.size() - (off - result); + } + else + return result - off; +} +template +auto csa_sada::serialize( + std::ostream & out, + structure_tree_node * v, + std::string name) const -> size_type +{ + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_psi.serialize(out, child, "psi"); + written_bytes += m_sa_sample.serialize(out, child, "sa_samples"); + written_bytes += m_isa_sample.serialize(out, child, "isa_samples"); + written_bytes += m_alphabet.serialize(out, child, "alphabet"); + structure_tree::add_size(child, written_bytes); + return written_bytes; +} +template +void csa_sada::load(std::istream & in) +{ + m_psi.load(in); + m_sa_sample.load(in); + m_isa_sample.load(in, &m_sa_sample); + m_alphabet.load(in); +} +template +template +void csa_sada::CEREAL_SAVE_FUNCTION_NAME( + archive_t & ar) const +{ + ar(CEREAL_NVP(m_psi)); + ar(CEREAL_NVP(m_sa_sample)); + ar(CEREAL_NVP(m_isa_sample)); + ar(CEREAL_NVP(m_alphabet)); +} +template +template +void csa_sada::CEREAL_LOAD_FUNCTION_NAME( + archive_t & ar) +{ + ar(CEREAL_NVP(m_psi)); + ar(CEREAL_NVP(m_sa_sample)); + ar(CEREAL_NVP(m_isa_sample)); + m_isa_sample.set_vector(&m_sa_sample); + ar(CEREAL_NVP(m_alphabet)); +} +} +#endif +#ifndef INCLUDED_SDSL_CSA_WT +#define INCLUDED_SDSL_CSA_WT +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +template , + uint32_t t_dens = 32, + uint32_t t_inv_dens = 64, + class t_sa_sample_strat = sa_order_sa_sampling<>, + class t_isa_sample_strat = isa_sampling<>, + class t_alphabet_strat = + typename wt_alphabet_trait::type> +class csa_wt +{ + static_assert(std::is_same::type, wt_tag>::value, + "First template argument has to be a wavelet tree type."); + static_assert(t_dens > 0, "Second template argument has to be greater then 0."); + static_assert(t_inv_dens > 0, "Third template argument has to be greater then 0."); + static_assert(std::is_same::type, sa_sampling_tag>::value, + "Forth template argument has to be a suffix array sampling strategy."); + static_assert(std::is_same::type, isa_sampling_tag>::value, + "Fifth template argument has to be a inverse suffix array sampling strategy."); + static_assert(is_alphabet::value, "Sixth template argument has to be a alphabet strategy."); + friend class bwt_of_csa_wt; +public: + enum + { + sa_sample_dens = t_dens, + isa_sample_dens = t_inv_dens + }; + typedef uint64_t value_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef const value_type const_reference; + typedef const_reference reference; + typedef const_reference * pointer; + typedef const pointer const_pointer; + typedef int_vector<>::size_type size_type; + typedef size_type csa_size_type; + typedef ptrdiff_t difference_type; + typedef traverse_csa_wt psi_type; + typedef traverse_csa_wt lf_type; + typedef bwt_of_csa_wt bwt_type; + typedef isa_of_csa_wt isa_type; + typedef first_row_of_csa first_row_type; + typedef text_of_csa text_type; + typedef t_wt wavelet_tree_type; + typedef typename t_sa_sample_strat::template type sa_sample_type; + typedef typename t_isa_sample_strat::template type isa_sample_type; + typedef t_alphabet_strat alphabet_type; + typedef typename alphabet_type::char_type char_type; + typedef typename alphabet_type::comp_char_type comp_char_type; + typedef typename alphabet_type::string_type string_type; + typedef csa_wt csa_type; + typedef csa_tag index_category; + typedef lf_tag extract_category; + typedef typename alphabet_type::alphabet_category alphabet_category; +private: + t_wt m_wavelet_tree; + sa_sample_type m_sa_sample; + isa_sample_type m_isa_sample; + alphabet_type m_alphabet; +#ifdef USE_CSA_CACHE + mutable fast_cache csa_cache; +#endif +public: + const typename alphabet_type::char2comp_type & char2comp = m_alphabet.char2comp; + const typename alphabet_type::comp2char_type & comp2char = m_alphabet.comp2char; + const typename alphabet_type::C_type & C = m_alphabet.C; + const typename alphabet_type::sigma_type & sigma = m_alphabet.sigma; + const psi_type psi = psi_type(*this); + const lf_type lf = lf_type(*this); + const bwt_type bwt = bwt_type(*this); + const text_type text = text_type(*this); + const first_row_type F = first_row_type(*this); + const bwt_type L = bwt_type(*this); + const isa_type isa = isa_type(*this); + sa_sample_type const & sa_sample = m_sa_sample; + isa_sample_type const & isa_sample = m_isa_sample; + wavelet_tree_type const & wavelet_tree = m_wavelet_tree; + csa_wt() = default; + csa_wt(csa_wt const & csa) : + m_wavelet_tree(csa.m_wavelet_tree), + m_sa_sample(csa.m_sa_sample), + m_isa_sample(csa.m_isa_sample), + m_alphabet(csa.m_alphabet) + { + m_isa_sample.set_vector(&m_sa_sample); + } + csa_wt(csa_wt && csa) : + m_wavelet_tree(std::move(csa.m_wavelet_tree)), + m_sa_sample(std::move(csa.m_sa_sample)), + m_isa_sample(std::move(csa.m_isa_sample)), + m_alphabet(std::move(csa.m_alphabet)) + { + m_isa_sample.set_vector(&m_sa_sample); + } + csa_wt(cache_config & config); + size_type size() const + { + return m_wavelet_tree.size(); + } + static size_type max_size() + { + return bit_vector::max_size(); + } + bool empty() const + { + return m_wavelet_tree.empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } + inline value_type operator[](size_type i) const; + csa_wt & operator=(csa_wt const & csa) + { + if (this != &csa) + { + csa_wt tmp(csa); + *this = std::move(tmp); + } + return *this; + } + csa_wt & operator=(csa_wt && csa) + { + if (this != &csa) + { + m_wavelet_tree = std::move(csa.m_wavelet_tree); + m_sa_sample = std::move(csa.m_sa_sample); + m_isa_sample = std::move(csa.m_isa_sample); + m_isa_sample.set_vector(&m_sa_sample); + m_alphabet = std::move(csa.m_alphabet); + } + return *this; + } + bool operator==(csa_wt const & other) const noexcept + { + return (m_wavelet_tree == other.m_wavelet_tree) && (m_sa_sample == other.m_sa_sample) + && (m_isa_sample == other.m_isa_sample) && (m_alphabet == other.m_alphabet); + } + bool operator!=(csa_wt const & other) const noexcept + { + return !(*this == other); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const; + void load(std::istream & in); + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const; + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar); +private: + size_type rank_bwt(size_type i, const char_type c) const + { + return m_wavelet_tree.rank(i, c); + } + size_type select_bwt(size_type i, const char_type c) const + { + assert(i > 0); + char_type cc = char2comp[c]; + if (cc == 0 and c != 0) + return size(); + assert(cc != 255); + if (C[cc] + i - 1 < C[cc + 1]) + { + return m_wavelet_tree.select(i, c); + } + else + return size(); + } +}; +template +csa_wt::csa_wt(cache_config & config) +{ + if (!cache_file_exists(key_bwt(), config)) + { + return; + } + { + auto event = memory_monitor::event("construct csa-alpbabet"); + int_vector_buffer bwt_buf( + cache_file_name(key_bwt(), config)); + size_type n = bwt_buf.size(); + m_alphabet = alphabet_type(bwt_buf, n); + } + { + auto event = memory_monitor::event("sample SA"); + m_sa_sample = sa_sample_type(config); + } + { + auto event = memory_monitor::event("sample ISA"); + isa_sample_type isa_s(config, &m_sa_sample); + util::swap_support(m_isa_sample, isa_s, &m_sa_sample, &m_sa_sample); + } + { + auto event = memory_monitor::event("construct wavelet tree"); + int_vector_buffer bwt_buf( + cache_file_name(key_bwt(), config)); + m_wavelet_tree = wavelet_tree_type(bwt_buf.begin(), bwt_buf.end(), config.dir); + } +} +template +inline auto csa_wt::operator[](size_type i) const + -> value_type +{ + size_type off = 0; + while (!m_sa_sample.is_sampled(i)) + { + i = lf[i]; + ++off; + } + value_type result = m_sa_sample[i]; + if (result + off < size()) + { + return result + off; + } + else + { + return result + off - size(); + } +} +template +auto csa_wt::serialize(std::ostream & out, + structure_tree_node * v, + std::string name) const + -> size_type +{ + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_wavelet_tree.serialize(out, child, "wavelet_tree"); + written_bytes += m_sa_sample.serialize(out, child, "sa_samples"); + written_bytes += m_isa_sample.serialize(out, child, "isa_samples"); + written_bytes += m_alphabet.serialize(out, child, "alphabet"); + structure_tree::add_size(child, written_bytes); + return written_bytes; +} +template +void csa_wt::load(std::istream & in) +{ + m_wavelet_tree.load(in); + m_sa_sample.load(in); + m_isa_sample.load(in, &m_sa_sample); + m_alphabet.load(in); +} +template +template +void csa_wt::CEREAL_SAVE_FUNCTION_NAME( + archive_t & ar) const +{ + ar(CEREAL_NVP(m_wavelet_tree)); + ar(CEREAL_NVP(m_sa_sample)); + ar(CEREAL_NVP(m_isa_sample)); + ar(CEREAL_NVP(m_alphabet)); +} +template +template +void csa_wt::CEREAL_LOAD_FUNCTION_NAME( + archive_t & ar) +{ + ar(CEREAL_NVP(m_wavelet_tree)); + ar(CEREAL_NVP(m_sa_sample)); + ar(CEREAL_NVP(m_isa_sample)); + m_isa_sample.set_vector(&m_sa_sample); + ar(CEREAL_NVP(m_alphabet)); +} +} +#endif +#ifndef INCLUDED_SDSL_CSA_UNCOMPRESSED +#define INCLUDED_SDSL_CSA_UNCOMPRESSED +#include +#include +#include +#include +namespace sdsl +{ +template +class csa_bitcompressed +{ + friend class bwt_of_csa_psi; +public: + typedef uint64_t value_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef const value_type const_reference; + typedef const_reference reference; + typedef const_reference * pointer; + typedef const pointer const_pointer; + typedef int_vector<>::size_type size_type; + typedef size_type csa_size_type; + typedef ptrdiff_t difference_type; + typedef traverse_csa_saisa psi_type; + typedef traverse_csa_saisa lf_type; + typedef bwt_of_csa_psi bwt_type; + typedef text_of_csa text_type; + typedef first_row_of_csa first_row_type; + typedef _sa_order_sampling sa_sample_type; + typedef _isa_sampling isa_sample_type; + typedef isa_sample_type isa_type; + typedef t_alphabet_strat alphabet_type; + typedef typename alphabet_type::char_type char_type; + typedef typename alphabet_type::comp_char_type comp_char_type; + typedef typename alphabet_type::string_type string_type; + typedef typename alphabet_type::alphabet_category alphabet_category; + typedef csa_bitcompressed csa_type; + typedef csa_tag index_category; + typedef psi_tag extract_category; + enum + { + sa_sample_dens = 1, + isa_sample_dens = 1 + }; +private: + sa_sample_type m_sa; + isa_sample_type m_isa; + alphabet_type m_alphabet; +public: + const typename alphabet_type::char2comp_type & char2comp = m_alphabet.char2comp; + const typename alphabet_type::comp2char_type & comp2char = m_alphabet.comp2char; + const typename alphabet_type::C_type & C = m_alphabet.C; + const typename alphabet_type::sigma_type & sigma = m_alphabet.sigma; + const psi_type psi = psi_type(*this); + const lf_type lf = lf_type(*this); + const bwt_type bwt = bwt_type(*this); + const bwt_type L = bwt_type(*this); + isa_type const & isa = m_isa; + const first_row_type F = first_row_type(*this); + const text_type text = text_type(*this); + sa_sample_type const & sa_sample = m_sa; + isa_sample_type const & isa_sample = m_isa; + csa_bitcompressed() + {} + csa_bitcompressed(csa_bitcompressed const & csa) : m_sa(csa.m_sa), m_isa(csa.m_isa), m_alphabet(csa.m_alphabet) + {} + csa_bitcompressed(csa_bitcompressed && csa) + { + *this = std::move(csa); + } + csa_bitcompressed(cache_config & config) + { + std::string text_file = cache_file_name(key_text(), config); + int_vector_buffer text_buf(text_file); + int_vector_buffer<> sa_buf(cache_file_name(conf::KEY_SA, config)); + size_type n = text_buf.size(); + m_alphabet = alphabet_type(text_buf, n); + m_sa = sa_sample_type(config); + m_isa = isa_sample_type(config); + } + size_type size() const + { + return m_sa.size(); + } + static size_type max_size() + { + return int_vector<>::max_size(); + } + bool empty() const + { + return m_sa.empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } + inline value_type operator[](size_type i) const + { + return m_sa[i]; + } + csa_bitcompressed & operator=(csa_bitcompressed const & csa) + { + if (this != &csa) + { + csa_bitcompressed tmp(csa); + *this = std::move(tmp); + } + return *this; + } + csa_bitcompressed & operator=(csa_bitcompressed && csa) + { + if (this != &csa) + { + m_sa = std::move(csa.m_sa); + m_isa = std::move(csa.m_isa); + m_alphabet = std::move(csa.m_alphabet); + } + return *this; + } + bool operator==(csa_bitcompressed const & other) const noexcept + { + return (m_sa == other.m_sa) && (m_isa == other.m_isa) && (m_alphabet == other.m_alphabet); + } + bool operator!=(csa_bitcompressed const & other) const noexcept + { + return !(*this == other); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_sa.serialize(out, child, "m_sa"); + written_bytes += m_isa.serialize(out, child, "m_isa"); + written_bytes += m_alphabet.serialize(out, child, "m_alphabet"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + m_sa.load(in); + m_isa.load(in); + m_alphabet.load(in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_sa)); + ar(CEREAL_NVP(m_isa)); + ar(CEREAL_NVP(m_alphabet)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_sa)); + ar(CEREAL_NVP(m_isa)); + ar(CEREAL_NVP(m_alphabet)); + } + size_type get_sample_dens() const + { + return 1; + } +private: + size_type rank_bwt(size_type i, const char_type c) const + { + comp_char_type cc = char2comp[c]; + if (cc == 0 and c != 0) + return 0; + size_type lower_b = C[cc], + upper_b = C[((size_type)1) + cc]; + while (lower_b + 1 < upper_b) + { + size_type mid = (lower_b + upper_b) / 2; + if (psi[mid] >= i) + upper_b = mid; + else + lower_b = mid; + } + if (lower_b > C[cc]) + return lower_b - C[cc] + 1; + else + { + return psi[lower_b] < i; + } + } + size_type select_bwt(size_type i, const char_type c) const + { + comp_char_type cc = char2comp[c]; + if (cc == 0 and c != 0) + return size(); + if (C[cc] + i - 1 < C[((size_type)1) + cc]) + { + return psi[C[cc] + i - 1]; + } + return size(); + } +}; +} +#endif +#ifndef INCLUDED_SDSL_SUFFIX_ARRAY_ALGORITHM +#define INCLUDED_SDSL_SUFFIX_ARRAY_ALGORITHM +#include +#include +#include +#include +#include +namespace sdsl +{ +template +typename t_csa::size_type forward_search( + t_csa const & csa, + typename t_csa::size_type l, + typename t_csa::size_type r, + t_pat_iter begin, + t_pat_iter end, + typename t_csa::size_type & l_res, + typename t_csa::size_type & r_res, + SDSL_UNUSED + typename std::enable_if::value, csa_tag>::type x = csa_tag()) +{ + assert(l <= r); + assert(r < csa.size()); + auto size = csa.size(); + l_res = l; + r_res = l - 1; + auto l_res_upper = r + 1; + auto r_res_upper = r + 1; + if ((typename t_csa::size_type)(end - begin) >= size) + return 0; + auto compare = [&](typename t_csa::size_type i) -> int + { + for (auto current = begin; current != end; current++) + { + auto index = csa.char2comp[*current]; + if (index == 0) + return -1; + if (csa.C[index + 1] - 1 < i) + return -1; + if (csa.C[index] > i) + return 1; + i = csa.psi[i]; + } + return 0; + }; + while (l_res < l_res_upper) + { + typename t_csa::size_type sample = l_res + (l_res_upper - l_res) / 2; + int result = compare(sample); + if (result == 1) + l_res = sample + 1; + else if (result == -1) + l_res_upper = sample; + else + l_res_upper = sample; + } + while (r_res + 1 < r_res_upper) + { + typename t_csa::size_type sample = r_res + (r_res_upper - r_res) / 2; + int result = compare(sample); + if (result == 1) + r_res = sample; + else if (result == -1) + r_res_upper = sample; + else + r_res = sample; + } + return r_res - l_res + 1; +} +template +typename t_csa::size_type forward_search( + t_csa const & csa, + typename t_csa::size_type l, + typename t_csa::size_type r, + typename t_csa::char_type c, + typename t_csa::size_type & l_res, + typename t_csa::size_type & r_res, + SDSL_UNUSED + typename std::enable_if::value, csa_tag>::type x = csa_tag()) +{ + auto c_ptr = &c; + return forward_search(csa, l, r, c_ptr, c_ptr + 1, l_res, r_res); +} +template +typename t_csa::size_type backward_search( + t_csa const & csa, + typename t_csa::size_type l, + typename t_csa::size_type r, + typename t_csa::char_type c, + typename t_csa::size_type & l_res, + typename t_csa::size_type & r_res, + SDSL_UNUSED + typename std::enable_if::value, csa_tag>::type x = csa_tag()) +{ + assert(l <= r); + assert(r < csa.size()); + typename t_csa::size_type cc = csa.char2comp[c]; + if (cc == 0 and c > 0) + { + l_res = 1; + r_res = 0; + } + else + { + typename t_csa::size_type c_begin = csa.C[cc]; + if (l == 0 and r + 1 == csa.size()) + { + l_res = c_begin; + r_res = csa.C[cc + 1] - 1; + } + else + { + l_res = c_begin + csa.bwt.rank(l, c); + r_res = c_begin + csa.bwt.rank(r + 1, c) - 1; + } + } + assert(r_res + 1 - l_res >= 0); + return r_res + 1 - l_res; +} +template +typename t_csa::size_type backward_search( + t_csa const & csa, + typename t_csa::size_type l, + typename t_csa::size_type r, + t_pat_iter begin, + t_pat_iter end, + typename t_csa::size_type & l_res, + typename t_csa::size_type & r_res, + SDSL_UNUSED + typename std::enable_if::value, csa_tag>::type x = csa_tag()) +{ + t_pat_iter it = end; + while (begin < it and r + 1 - l > 0) + { + --it; + backward_search(csa, l, r, (typename t_csa::char_type) * it, l, r); + } + l_res = l; + r_res = r; + return r + 1 - l; +} +template +typename csa_wt::size_type +bidirectional_search(csa_wt const & csa_fwd, + typename csa_wt<>::size_type l_fwd, + typename csa_wt<>::size_type r_fwd, + typename csa_wt<>::size_type l_bwd, + typename csa_wt<>::size_type r_bwd, + typename csa_wt<>::char_type c, + typename csa_wt<>::size_type & l_fwd_res, + typename csa_wt<>::size_type & r_fwd_res, + typename csa_wt<>::size_type & l_bwd_res, + typename csa_wt<>::size_type & r_bwd_res, + SDSL_UNUSED typename std::enable_if::type x = csa_tag()) +{ + assert(l_fwd <= r_fwd); + assert(r_fwd < csa_fwd.size()); + typedef typename csa_wt::size_type size_type; + size_type c_begin = csa_fwd.C[csa_fwd.char2comp[c]]; + auto r_s_b = csa_fwd.wavelet_tree.lex_count(l_fwd, r_fwd + 1, c); + size_type rank_l = std::get<0>(r_s_b); + size_type s = std::get<1>(r_s_b), b = std::get<2>(r_s_b); + size_type rank_r = r_fwd - l_fwd - s - b + rank_l; + l_fwd_res = c_begin + rank_l; + r_fwd_res = c_begin + rank_r; + assert(r_fwd_res + 1 >= l_fwd_res); + l_bwd_res = l_bwd + s; + r_bwd_res = r_bwd - b; + assert(r_bwd_res - l_bwd_res == r_fwd_res - l_fwd_res); + return r_fwd_res + 1 - l_fwd_res; +} +template +typename csa_wt<>::size_type bidirectional_search_backward( + csa_wt const & csa_fwd, + SDSL_UNUSED csa_wt const & csa_bwd, + typename csa_wt<>::size_type l_fwd, + typename csa_wt<>::size_type r_fwd, + typename csa_wt<>::size_type l_bwd, + typename csa_wt<>::size_type r_bwd, + t_pat_iter begin, + t_pat_iter end, + typename csa_wt<>::size_type & l_fwd_res, + typename csa_wt<>::size_type & r_fwd_res, + typename csa_wt<>::size_type & l_bwd_res, + typename csa_wt<>::size_type & r_bwd_res, + SDSL_UNUSED typename std::enable_if::type x = csa_tag()) +{ + t_pat_iter it = end; + while (begin < it and r_fwd + 1 - l_fwd > 0) + { + --it; + bidirectional_search(csa_fwd, + l_fwd, + r_fwd, + l_bwd, + r_bwd, + (typename csa_wt<>::char_type) * it, + l_fwd, + r_fwd, + l_bwd, + r_bwd); + } + l_fwd_res = l_fwd; + r_fwd_res = r_fwd; + l_bwd_res = l_bwd; + r_bwd_res = r_bwd; + return r_fwd + 1 - l_fwd; +} +template +typename csa_wt::size_type bidirectional_search_forward( + SDSL_UNUSED csa_wt const & csa_fwd, + csa_wt const & csa_bwd, + typename csa_wt<>::size_type l_fwd, + typename csa_wt<>::size_type r_fwd, + typename csa_wt<>::size_type l_bwd, + typename csa_wt<>::size_type r_bwd, + t_pat_iter begin, + t_pat_iter end, + typename csa_wt<>::size_type & l_fwd_res, + typename csa_wt<>::size_type & r_fwd_res, + typename csa_wt<>::size_type & l_bwd_res, + typename csa_wt<>::size_type & r_bwd_res, + SDSL_UNUSED typename std::enable_if::type x = csa_tag()) +{ + t_pat_iter it = begin; + while (it < end and r_fwd + 1 - l_fwd > 0) + { + bidirectional_search(csa_bwd, + l_bwd, + r_bwd, + l_fwd, + r_fwd, + (typename csa_wt<>::char_type) * it, + l_bwd, + r_bwd, + l_fwd, + r_fwd); + ++it; + } + l_fwd_res = l_fwd; + r_fwd_res = r_fwd; + l_bwd_res = l_bwd; + r_bwd_res = r_bwd; + return r_fwd + 1 - l_fwd; +} +template +typename t_csa::size_type count(t_csa const & csa, t_pat_iter begin, t_pat_iter end, csa_tag) +{ + if (end - begin > (typename std::iterator_traits::difference_type)csa.size()) + return 0; + typename t_csa::size_type t = 0; + typename t_csa::size_type result = backward_search(csa, 0, csa.size() - 1, begin, end, t, t); + return result; +} +template +typename t_csx::size_type count(t_csx const & csx, t_pat_iter begin, t_pat_iter end) +{ + typename t_csx::index_category tag; + return count(csx, begin, end, tag); +} +template +typename t_csx::size_type count(t_csx const & csx, const typename t_csx::string_type & pat) +{ + typename t_csx::index_category tag; + return count(csx, pat.begin(), pat.end(), tag); +} +template +auto lex_interval(t_csx const & csx, t_pat_iter begin, t_pat_iter end) -> std::array +{ + std::array res; + backward_search(csx, 0, csx.size() - 1, begin, end, res[0], res[1]); + return res; +} +template > +t_rac locate(t_csa const & csa, + t_pat_iter begin, + t_pat_iter end, + SDSL_UNUSED + typename std::enable_if::value, csa_tag>::type x = + csa_tag()) +{ + typename t_csa::size_type occ_begin, occ_end, occs; + occs = backward_search(csa, 0, csa.size() - 1, begin, end, occ_begin, occ_end); + t_rac occ(occs); + for (typename t_csa::size_type i = 0; i < occs; ++i) + { + occ[i] = csa[occ_begin + i]; + } + return occ; +} +template > +t_rac locate(t_csx const & csx, const typename t_csx::string_type & pat) +{ + typename t_csx::index_category tag; + return locate(csx, pat.begin(), pat.end(), tag); +} +template +typename t_csa::size_type extract( + t_csa const & csa, + typename t_csa::size_type begin, + typename t_csa::size_type end, + t_text_iter text, + SDSL_UNUSED + typename std::enable_if::value, csa_tag>::type x = csa_tag()) +{ + typename t_csa::extract_category extract_tag; + return extract(csa, begin, end, text, extract_tag); +} +template +typename t_csa::size_type +extract(t_csa const & csa, typename t_csa::size_type begin, typename t_csa::size_type end, t_text_iter text, lf_tag) +{ + assert(end < csa.size()); + assert(begin <= end); + auto steps = end - begin + 1; + if (steps > 0) + { + auto order = csa.isa[end]; + text[--steps] = first_row_symbol(order, csa); + while (steps != 0) + { + auto rc = csa.wavelet_tree.inverse_select(order); + auto j = rc.first; + auto c = rc.second; + order = csa.C[csa.char2comp[c]] + j; + text[--steps] = c; + } + } + return end - begin + 1; +} +template +typename t_csa::size_type +extract(t_csa const & csa, typename t_csa::size_type begin, typename t_csa::size_type end, t_text_iter text, psi_tag) +{ + assert(end < csa.size()); + assert(begin <= end); + typename t_csa::size_type steps = end - begin + 1; + for (typename t_csa::size_type i = 0, order = csa.isa[begin]; steps != 0; --steps, ++i) + { + text[i] = first_row_symbol(order, csa); + if (steps != 0) + order = csa.psi[order]; + } + return end - begin + 1; +} +template +typename t_csa::string_type extract( + t_csa const & csa, + typename t_csa::size_type begin, + typename t_csa::size_type end, + SDSL_UNUSED + typename std::enable_if::value, csa_tag>::type x = csa_tag()) +{ + assert(end <= csa.size()); + assert(begin <= end); + typedef typename t_csa::string_type string_type; + string_type result(end - begin + 1, (typename string_type::value_type)0); + extract(csa, begin, end, result.begin()); + return result; +} +} +#endif +#ifndef INCLUDED_SDSL_WAVELET_TREES +#define INCLUDED_SDSL_WAVELET_TREES +#ifndef INCLUDED_SDSL_WM_INT +#define INCLUDED_SDSL_WM_INT +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +template +class wm_int +{ +public: + typedef int_vector<>::size_type size_type; + typedef int_vector<>::value_type value_type; + typedef typename t_bitvector::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef t_bitvector bit_vector_type; + typedef t_rank rank_1_type; + typedef t_select select_1_type; + typedef t_select_zero select_0_type; + typedef wt_tag index_category; + typedef int_alphabet_tag alphabet_category; + enum + { + lex_ordered = 0 + }; + typedef std::pair point_type; + typedef std::vector point_vec_type; + typedef std::pair r2d_res_type; + struct node_type; +protected: + size_type m_size = 0; + size_type m_sigma = 0; + bit_vector_type m_tree; + rank_1_type m_tree_rank; + select_1_type m_tree_select1; + select_0_type m_tree_select0; + uint32_t m_max_level = 0; + int_vector<64> m_zero_cnt; + int_vector<64> m_rank_level; +public: + size_type const & sigma = m_sigma; + bit_vector_type const & tree = m_tree; + uint32_t const & max_level = m_max_level; + wm_int() = default; + template + wm_int(t_it begin, t_it end, std::string tmp_dir = ram_file_name("")) : m_size(std::distance(begin, end)) + { + if (0 == m_size) + return; + m_sigma = 0; + value_type max_elem = 1; + for (auto it = begin; it != end; ++it) + { + value_type value = *it; + if (value > max_elem) + max_elem = value; + } + m_max_level = bits::hi(max_elem) + 1; + std::string tree_out_buf_file_name = tmp_file(tmp_dir, "_m_tree"); + { + int_vector<> rac(m_size, 0, m_max_level); + std::copy(begin, end, rac.begin()); + std::string zero_buf_file_name = tmp_file(tmp_dir, "_zero_buf"); + osfstream tree_out_buf(tree_out_buf_file_name, + std::ios::binary | std::ios::trunc | std::ios::out); + size_type bit_size = m_size * m_max_level; + int_vector<1>::write_header(bit_size, 1, tree_out_buf); + size_type tree_pos = 0; + uint64_t tree_word = 0; + m_zero_cnt = int_vector<64>(m_max_level, 0); + for (uint32_t k = 0; k < m_max_level; ++k) + { + uint8_t width = m_max_level - k - 1; + const uint64_t mask = 1ULL << width; + uint64_t x = 0; + size_type zeros = 0; + int_vector_buffer<> zero_buf(zero_buf_file_name, std::ios::out, 1024 * 1024, m_max_level); + for (size_t i = 0; i < m_size; ++i) + { + x = rac[i]; + if (x & mask) + { + tree_word |= (1ULL << (tree_pos & 0x3FULL)); + zero_buf.push_back(x); + } + else + { + rac[zeros++] = x; + } + ++tree_pos; + if ((tree_pos & 0x3FULL) == 0) + { + tree_out_buf.write((char *)&tree_word, sizeof(tree_word)); + tree_word = 0; + } + } + m_zero_cnt[k] = zeros; + for (size_t i = zeros; i < m_size; ++i) + { + rac[i] = zero_buf[i - zeros]; + } + } + if ((tree_pos & 0x3FULL) != 0) + { + tree_out_buf.write((char *)&tree_word, sizeof(tree_word)); + } + sdsl::remove(zero_buf_file_name); + tree_out_buf.close(); + m_sigma = std::unique(rac.begin(), rac.end()) - rac.begin(); + } + bit_vector tree; + load_from_file(tree, tree_out_buf_file_name); + sdsl::remove(tree_out_buf_file_name); + m_tree = bit_vector_type(std::move(tree)); + util::init_support(m_tree_rank, &m_tree); + util::init_support(m_tree_select0, &m_tree); + util::init_support(m_tree_select1, &m_tree); + m_rank_level = int_vector<64>(m_max_level, 0); + for (uint32_t k = 0; k < m_rank_level.size(); ++k) + { + m_rank_level[k] = m_tree_rank(k * m_size); + } + } + wm_int(wm_int const & wt) : + m_size(wt.m_size), + m_sigma(wt.m_sigma), + m_tree(wt.m_tree), + m_tree_rank(wt.m_tree_rank), + m_tree_select1(wt.m_tree_select1), + m_tree_select0(wt.m_tree_select0), + m_max_level(wt.m_max_level), + m_zero_cnt(wt.m_zero_cnt), + m_rank_level(wt.m_rank_level) + { + m_tree_rank.set_vector(&m_tree); + m_tree_select1.set_vector(&m_tree); + m_tree_select0.set_vector(&m_tree); + } + wm_int(wm_int && wt) : + m_size(wt.m_size), + m_sigma(wt.m_sigma), + m_tree(std::move(wt.m_tree)), + m_tree_rank(std::move(wt.m_tree_rank)), + m_tree_select1(std::move(wt.m_tree_select1)), + m_tree_select0(std::move(wt.m_tree_select0)), + m_max_level(wt.m_max_level), + m_zero_cnt(wt.m_zero_cnt), + m_rank_level(wt.m_rank_level) + { + m_tree_rank.set_vector(&m_tree); + m_tree_select1.set_vector(&m_tree); + m_tree_select0.set_vector(&m_tree); + } + wm_int & operator=(wm_int const & wt) + { + if (this != &wt) + { + m_size = wt.m_size; + m_sigma = wt.m_sigma; + m_tree = wt.m_tree; + m_tree_rank = wt.m_tree_rank; + m_tree_rank.set_vector(&m_tree); + m_tree_select1 = wt.m_tree_select1; + m_tree_select1.set_vector(&m_tree); + m_tree_select0 = wt.m_tree_select0; + m_tree_select0.set_vector(&m_tree); + m_max_level = wt.m_max_level; + m_zero_cnt = wt.m_zero_cnt; + m_rank_level = wt.m_rank_level; + } + return *this; + } + wm_int & operator=(wm_int && wt) + { + if (this != &wt) + { + m_size = wt.m_size; + m_sigma = wt.m_sigma; + m_tree = std::move(wt.m_tree); + m_tree_rank = std::move(wt.m_tree_rank); + m_tree_rank.set_vector(&m_tree); + m_tree_select1 = std::move(wt.m_tree_select1); + m_tree_select1.set_vector(&m_tree); + m_tree_select0 = std::move(wt.m_tree_select0); + m_tree_select0.set_vector(&m_tree); + m_max_level = std::move(wt.m_max_level); + m_zero_cnt = std::move(wt.m_zero_cnt); + m_rank_level = std::move(wt.m_rank_level); + } + return *this; + } + size_type size() const + { + return m_size; + } + bool empty() const + { + return m_size == 0; + } + value_type operator[](size_type i) const + { + assert(i < size()); + value_type res = 0; + for (uint32_t k = 0; k < m_max_level; ++k) + { + res <<= 1; + size_type rank_ones = m_tree_rank(i) - m_rank_level[k]; + if (m_tree[i]) + { + i = (k + 1) * m_size + m_zero_cnt[k] + rank_ones; + res |= 1; + } + else + { + auto rank_zeros = (i - k * m_size) - rank_ones; + i = (k + 1) * m_size + rank_zeros; + } + } + return res; + }; + size_type rank(size_type i, value_type c) const + { + assert(i <= size()); + if (((1ULL) << (m_max_level)) <= c) + { + return 0; + } + size_type b = 0; + uint64_t mask = (1ULL) << (m_max_level - 1); + for (uint32_t k = 0; k < m_max_level and i; ++k) + { + size_type rank_b = m_tree_rank(b); + size_type ones = m_tree_rank(b + i) - rank_b; + size_type ones_p = rank_b - m_rank_level[k]; + if (c & mask) + { + i = ones; + b = (k + 1) * m_size + m_zero_cnt[k] + ones_p; + } + else + { + i = i - ones; + b = (k + 1) * m_size + (b - k * m_size - ones_p); + } + mask >>= 1; + } + return i; + }; + std::pair inverse_select(size_type i) const + { + assert(i < size()); + value_type c = 0; + size_type b = 0; + for (uint32_t k = 0; k < m_max_level; ++k) + { + size_type rank_b = m_tree_rank(b); + size_type ones = m_tree_rank(b + i) - rank_b; + size_type ones_p = rank_b - m_rank_level[k]; + c <<= 1; + if (m_tree[b + i]) + { + i = ones; + b = (k + 1) * m_size + m_zero_cnt[k] + ones_p; + c |= 1; + } + else + { + i = i - ones; + b = (k + 1) * m_size + (b - k * m_size - ones_p); + } + } + return std::make_pair(i, c); + } + size_type select(size_type i, value_type c) const + { + assert(1 <= i and i <= rank(size(), c)); + uint64_t mask = 1ULL << (m_max_level - 1); + int_vector<64> m_path_off(max_level + 1); + int_vector<64> m_path_rank_off(max_level + 1); + m_path_off[0] = m_path_rank_off[0] = 0; + size_type b = 0; + size_type r = i; + for (uint32_t k = 0; k < m_max_level and i; ++k) + { + size_type rank_b = m_tree_rank(b); + size_type ones = m_tree_rank(b + r) - rank_b; + size_type ones_p = rank_b - m_rank_level[k]; + if (c & mask) + { + r = ones; + b = (k + 1) * m_size + m_zero_cnt[k] + ones_p; + } + else + { + r = r - ones; + b = (k + 1) * m_size + (b - k * m_size - ones_p); + } + mask >>= 1; + m_path_off[k + 1] = b; + m_path_rank_off[k] = rank_b; + } + mask = 1ULL; + for (uint32_t k = m_max_level; k > 0; --k) + { + b = m_path_off[k - 1]; + size_type rank_b = m_path_rank_off[k - 1]; + if (c & mask) + { + i = m_tree_select1(rank_b + i) - b + 1; + } + else + { + i = m_tree_select0(b - rank_b + i) - b + 1; + } + mask <<= 1; + } + return i - 1; + }; + std::pair>> + range_search_2d(size_type lb, size_type rb, value_type vlb, value_type vrb, bool report = true) const + { + if (vrb > (1ULL << m_max_level)) + vrb = (1ULL << m_max_level); + if (vlb > vrb) + return make_pair(0, point_vec_type()); + size_type cnt_answers = 0; + point_vec_type point_vec; + if (lb <= rb) + { + std::vector is(m_max_level + 1); + std::vector rank_off(m_max_level + 1); + _range_search_2d(root(), {{lb, rb}}, vlb, vrb, 0, is, rank_off, point_vec, report, cnt_answers); + } + return make_pair(cnt_answers, point_vec); + } + void _range_search_2d(node_type v, + range_type r, + value_type vlb, + value_type vrb, + size_type ilb, + std::vector & is, + std::vector & rank_off, + point_vec_type & point_vec, + bool report, + size_type & cnt_answers) const + { + using std::get; + if (get<0>(r) > get<1>(r)) + return; + is[v.level] = v.offset + get<0>(r); + if (v.level == m_max_level) + { + for (size_type j = 1; j <= sdsl::size(r) and report; ++j) + { + size_type i = j; + size_type c = v.sym; + for (uint32_t k = m_max_level; k > 0; --k) + { + size_type offset = is[k - 1]; + size_type rank_offset = rank_off[k - 1]; + if (c & 1) + { + i = m_tree_select1(rank_offset + i) - offset + 1; + } + else + { + i = m_tree_select0(offset - rank_offset + i) - offset + 1; + } + c >>= 1; + } + point_vec.emplace_back(is[0] + i - 1, v.sym); + } + cnt_answers += sdsl::size(r); + return; + } + else + { + rank_off[v.level] = m_tree_rank(is[v.level]); + } + size_type irb = ilb + (1ULL << (m_max_level - v.level)); + size_type mid = (irb + ilb) >> 1; + auto c_v = expand(v); + auto c_r = expand(v, r); + if (!sdsl::empty(get<0>(c_r)) and vlb < mid and mid) + { + _range_search_2d(get<0>(c_v), + get<0>(c_r), + vlb, + std::min(vrb, mid - 1), + ilb, + is, + rank_off, + point_vec, + report, + cnt_answers); + } + if (!sdsl::empty(get<1>(c_r)) and vrb >= mid) + { + _range_search_2d(get<1>(c_v), + get<1>(c_r), + std::max(mid, vlb), + vrb, + mid, + is, + rank_off, + point_vec, + report, + cnt_answers); + } + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += write_member(m_sigma, out, child, "sigma"); + written_bytes += m_tree.serialize(out, child, "tree"); + written_bytes += m_tree_rank.serialize(out, child, "tree_rank"); + written_bytes += m_tree_select1.serialize(out, child, "tree_select_1"); + written_bytes += m_tree_select0.serialize(out, child, "tree_select_0"); + written_bytes += write_member(m_max_level, out, child, "max_level"); + written_bytes += m_zero_cnt.serialize(out, child, "zero_cnt"); + written_bytes += m_rank_level.serialize(out, child, "rank_level"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(m_size, in); + read_member(m_sigma, in); + m_tree.load(in); + m_tree_rank.load(in, &m_tree); + m_tree_select1.load(in, &m_tree); + m_tree_select0.load(in, &m_tree); + read_member(m_max_level, in); + m_zero_cnt.load(in); + m_rank_level.load(in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_max_level)); + ar(CEREAL_NVP(m_tree)); + ar(CEREAL_NVP(m_tree_rank)); + ar(CEREAL_NVP(m_tree_select1)); + ar(CEREAL_NVP(m_tree_select0)); + ar(CEREAL_NVP(m_zero_cnt)); + ar(CEREAL_NVP(m_rank_level)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_max_level)); + ar(CEREAL_NVP(m_tree)); + ar(CEREAL_NVP(m_tree_rank)); + m_tree_rank.set_vector(&m_tree); + ar(CEREAL_NVP(m_tree_select1)); + m_tree_select1.set_vector(&m_tree); + ar(CEREAL_NVP(m_tree_select0)); + m_tree_select0.set_vector(&m_tree); + ar(CEREAL_NVP(m_zero_cnt)); + ar(CEREAL_NVP(m_rank_level)); + } + bool operator==(wm_int const & other) const noexcept + { + return (m_size == other.m_size) && (m_sigma == other.m_sigma) && (m_max_level == other.m_max_level) + && (m_tree == other.m_tree) && (m_tree_rank == other.m_tree_rank) + && (m_tree_select1 == other.m_tree_select1) && (m_tree_select0 == other.m_tree_select0) + && (m_zero_cnt == other.m_zero_cnt) && (m_rank_level == other.m_rank_level); + } + bool operator!=(wm_int const & other) const noexcept + { + return !(*this == other); + } + struct node_type + { + size_type offset = 0; + size_type size = 0; + size_type level = 0; + value_type sym = 0; + node_type(size_type o = 0, size_type sz = 0, size_type l = 0, value_type sy = 0) : + offset(o), + size(sz), + level(l), + sym(sy) + {} + node_type(node_type const &) = default; + node_type(node_type &&) = default; + node_type & operator=(node_type const &) = default; + node_type & operator=(node_type &&) = default; + bool operator==(node_type const & v) const + { + return offset == v.offset; + } + bool operator<(node_type const & v) const + { + return offset < v.offset; + } + bool operator>(node_type const & v) const + { + return offset > v.offset; + } + }; + bool is_leaf(node_type const & v) const + { + return v.level == m_max_level; + } + value_type sym(node_type const & v) const + { + return v.sym; + } + auto bit_vec(node_type const & v) const -> node_bv_container + { + return node_bv_container(begin(v), end(v)); + } + auto seq(node_type const & v) const -> random_access_container> + { + return random_access_container>( + [&v, this](size_type i) + { + node_type vv = v; + while (!is_leaf(vv)) + { + auto vs = expand(vv); + auto rs = expand(vv, {0, i}); + bool bit = *(begin(vv) + i); + i = std::get<1>(rs[bit]); + vv = vs[bit]; + } + return sym(vv); + }, + size(v)); + } + bool empty(node_type const & v) const + { + return v.size == (size_type)0; + } + auto size(node_type const & v) const -> decltype(v.size) + { + return v.size; + } + node_type root() const + { + return node_type(0, m_size, 0, 0); + } + std::array expand(node_type const & v) const + { + node_type v_right = v; + return expand(std::move(v_right)); + } + std::array expand(node_type && v) const + { + node_type v_left; + size_type rank_b = m_tree_rank(v.offset); + size_type ones = m_tree_rank(v.offset + v.size) - rank_b; + size_type ones_p = rank_b - m_rank_level[v.level]; + v_left.offset = (v.level + 1) * m_size + (v.offset - v.level * m_size) - ones_p; + v_left.size = v.size - ones; + v_left.level = v.level + 1; + v_left.sym = v.sym << 1; + v.offset = (v.level + 1) * m_size + m_zero_cnt[v.level] + ones_p; + v.size = ones; + v.level = v.level + 1; + v.sym = (v.sym << 1) | 1; + return {{std::move(v_left), v}}; + } + std::array expand(node_type const & v, range_vec_type const & ranges) const + { + auto ranges_copy = ranges; + return expand(v, std::move(ranges_copy)); + } + std::array expand(node_type const & v, range_vec_type && ranges) const + { + auto v_sp_rank = m_tree_rank(v.offset); + range_vec_type res(ranges.size()); + size_t i = 0; + for (auto & r : ranges) + { + auto sp_rank = m_tree_rank(v.offset + r[0]); + auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank; + auto left_size = (r[1] - r[0] + 1) - right_size; + auto right_sp = sp_rank - v_sp_rank; + auto left_sp = r[0] - right_sp; + r = {{left_sp, left_sp + left_size - 1}}; + res[i++] = {{right_sp, right_sp + right_size - 1}}; + } + return {{ranges, std::move(res)}}; + } + std::array expand(node_type const & v, range_type const & r) const + { + auto v_sp_rank = m_tree_rank(v.offset); + auto sp_rank = m_tree_rank(v.offset + r[0]); + auto right_size = m_tree_rank(v.offset + r[1] + 1) - sp_rank; + auto left_size = (r[1] - r[0] + 1) - right_size; + auto right_sp = sp_rank - v_sp_rank; + auto left_sp = r[0] - right_sp; + return {{{{left_sp, left_sp + left_size - 1}}, {{right_sp, right_sp + right_size - 1}}}}; + } + std::pair path(value_type c) const + { + return {m_max_level, c}; + } +private: + auto begin(node_type const & v) const -> decltype(m_tree.begin() + v.offset) + { + return m_tree.begin() + v.offset; + } + auto end(node_type const & v) const -> decltype(m_tree.begin() + v.offset + v.size) + { + return m_tree.begin() + v.offset + v.size; + } +}; +} +#endif +#ifndef INCLUDED_SDSL_WT_AP +#define INCLUDED_SDSL_WT_AP +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +template >, class t_wt_int = wm_int<>> +class wt_ap +{ + static_assert(std::is_same::type, wt_tag>::value, + "First template argument has to be a wavelet tree."); + static_assert(std::is_same::type, wt_tag>::value, + "Second template argument has to be a wavelet tree."); +public: + typedef int_vector<>::size_type size_type; + typedef int_vector<>::value_type value_type; + typedef int_vector<>::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef t_wt_byte wt_byte_type; + typedef t_wt_int wt_int_type; + typedef wt_tag index_category; + typedef int_alphabet_tag alphabet_category; + enum + { + lex_ordered = 0 + }; +protected: + size_type m_size = 0; + value_type m_sigma = 0; + value_type m_singleton_class_cnt = 0; + value_type m_class_cnt = 0; + wt_byte_type m_char2class; + wt_byte_type m_class; + std::vector m_offset; +private: + inline std::tuple try_get_char_class_offset(value_type c) const + { + if (c >= m_char2class.size()) + { + return std::make_tuple(false, 0, 0); + } + auto offset_class = m_char2class.inverse_select(c); + if (offset_class.second == m_class_cnt) + { + return std::make_tuple(false, 0, 0); + } + return std::make_tuple(true, offset_class.second, offset_class.first); + } +public: + size_type const & sigma = m_sigma; + wt_ap() + {} + template + wt_ap(t_it begin, t_it end, std::string tmp_dir = ram_file_name("")) : m_size(std::distance(begin, end)) + { + const uint8_t wt_byte_width = wt_byte_type::alphabet_category::WIDTH; + const uint8_t wt_int_width = wt_int_type::alphabet_category::WIDTH; + value_type max_symbol = 0; + std::vector> char_freq; + value_type pseudo_entries = 0; + { + auto event = memory_monitor::event("char freq"); + for (auto it = begin; it != end; ++it) + { + value_type element = *it; + while (element >= max_symbol) + { + char_freq.emplace_back(0, max_symbol); + max_symbol++; + pseudo_entries++; + } + if (char_freq[element].first == 0) + { + pseudo_entries--; + } + char_freq[element].first++; + } + std::sort(char_freq.rbegin(), char_freq.rend()); + m_sigma = max_symbol - pseudo_entries; + } + m_singleton_class_cnt = std::min(max_symbol, (value_type)bits::hi(m_sigma)); + m_class_cnt = bits::hi(m_sigma - m_singleton_class_cnt + 1) + m_singleton_class_cnt; + std::vector>> temp_file_offset_buffers; + int_vector m_char2class_buffer(max_symbol, m_class_cnt, bits::hi(m_class_cnt + 1) + 1); + for (value_type i = 0; i < m_singleton_class_cnt; ++i) + { + m_char2class_buffer[char_freq[i].second] = i; + } + value_type current_symbol = m_singleton_class_cnt; + value_type class_size = 1; + { + auto event = memory_monitor::event("char2class"); + for (value_type i = m_singleton_class_cnt; i < m_class_cnt; ++i) + { + class_size <<= 1; + value_type offset = 0; + for (; offset < class_size && current_symbol < m_sigma; ++offset, ++current_symbol) + { + m_char2class_buffer[char_freq[current_symbol].second] = i; + } + std::string temp_file_offset = tmp_dir + "_wt_ap_offset_" + util::to_string(i - m_singleton_class_cnt) + + "_" + util::to_string(util::pid()) + "_" + util::to_string(util::id()); + temp_file_offset_buffers.emplace_back(temp_file_offset, + int_vector_buffer(temp_file_offset, + std::ios::out, + 1024 * 1024, + bits::hi(offset) + 1)); + } + char_freq.clear(); + construct_im(m_char2class, m_char2class_buffer); + } + std::string temp_file_class = + tmp_dir + "_wt_ap_class_" + util::to_string(util::pid()) + "_" + util::to_string(util::id()); + int_vector_buffer class_buffer(temp_file_class, + std::ios::out, + 1024 * 1024, + bits::hi(m_class_cnt) + 1); + { + auto event = memory_monitor::event("write class and offset"); + for (auto it = begin; it != end; ++it) + { + value_type ch = *it; + value_type cl = m_char2class_buffer[ch]; + class_buffer.push_back(cl); + if (cl >= m_singleton_class_cnt) + { + value_type offset = m_char2class.rank(ch, cl); + cl -= m_singleton_class_cnt; + temp_file_offset_buffers[cl].second.push_back(offset); + } + } + class_buffer.close(); + } + { + auto event = memory_monitor::event("class WT"); + int_vector_buffer class_buffer(temp_file_class); + m_class = wt_byte_type(class_buffer.begin(), class_buffer.end(), tmp_dir); + } + sdsl::remove(temp_file_class); + { + auto event = memory_monitor::event("offset WTs"); + m_offset.resize(m_class_cnt - m_singleton_class_cnt); + for (value_type i = 0; i < m_class_cnt - m_singleton_class_cnt; ++i) + { + auto & temp_file_offset_buffer = temp_file_offset_buffers[i]; + temp_file_offset_buffer.second.close(); + { + int_vector_buffer offset_buffer(temp_file_offset_buffer.first); + m_offset[i] = wt_int_type(offset_buffer.begin(), offset_buffer.end(), tmp_dir); + } + sdsl::remove(temp_file_offset_buffer.first); + } + } + } + wt_ap(wt_ap const & wt) : + m_size(wt.m_size), + m_sigma(wt.m_sigma), + m_singleton_class_cnt(wt.m_singleton_class_cnt), + m_class_cnt(wt.m_class_cnt), + m_char2class(wt.m_char2class), + m_class(wt.m_class), + m_offset(wt.m_offset) + {} + wt_ap(wt_ap && wt) + { + *this = std::move(wt); + } + wt_ap & operator=(wt_ap const & wt) + { + if (this != &wt) + { + wt_ap tmp(wt); + *this = std::move(tmp); + } + return *this; + } + wt_ap & operator=(wt_ap && wt) + { + if (this != &wt) + { + m_size = wt.m_size; + m_sigma = wt.m_sigma; + m_singleton_class_cnt = wt.m_singleton_class_cnt; + m_class_cnt = wt.m_class_cnt; + m_char2class = std::move(wt.m_char2class); + m_class = std::move(wt.m_class); + m_offset = std::move(wt.m_offset); + } + return *this; + } + size_type size() const + { + return m_size; + } + bool empty() const + { + return m_size == 0; + } + value_type operator[](size_type i) const + { + assert(i < size()); + auto textoffset_class = m_class.inverse_select(i); + auto cl = textoffset_class.second; + value_type offset = + cl < m_singleton_class_cnt ? 0 : m_offset[cl - m_singleton_class_cnt][textoffset_class.first]; + return m_char2class.select(offset + 1, cl); + }; + size_type rank(size_type i, value_type c) const + { + assert(i <= size()); + auto success_class_offset = try_get_char_class_offset(c); + if (!std::get<0>(success_class_offset)) + { + return 0; + } + auto cl = std::get<1>(success_class_offset); + auto offset = std::get<2>(success_class_offset); + size_type count = m_class.rank(i, cl); + return cl < m_singleton_class_cnt ? count : m_offset[cl - m_singleton_class_cnt].rank(count, offset); + }; + std::pair inverse_select(size_type i) const + { + assert(i < size()); + auto textoffset_class = m_class.inverse_select(i); + auto textoffset = textoffset_class.first; + auto cl = textoffset_class.second; + if (cl < m_singleton_class_cnt) + { + return std::make_pair(textoffset, m_char2class.select(1, cl)); + } + auto class_result = m_offset[cl - m_singleton_class_cnt].inverse_select(textoffset); + return std::make_pair(class_result.first, m_char2class.select(class_result.second + 1, cl)); + } + size_type select(size_type i, value_type c) const + { + assert(1 <= i and i <= rank(size(), c)); + auto success_class_offset = try_get_char_class_offset(c); + if (!std::get<0>(success_class_offset)) + { + return m_size; + } + auto cl = std::get<1>(success_class_offset); + auto offset = std::get<2>(success_class_offset); + size_type text_offset = + cl < m_singleton_class_cnt ? i : 1 + m_offset[cl - m_singleton_class_cnt].select(i, offset); + return m_class.select(text_offset, cl); + }; + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += write_member(m_sigma, out, child, "sigma"); + written_bytes += write_member(m_singleton_class_cnt, out, child, "singleton_classes"); + written_bytes += write_member(m_class_cnt, out, child, "classes"); + written_bytes += m_char2class.serialize(out, child, "char2class"); + written_bytes += m_class.serialize(out, child, "class"); + for (value_type i = 0; i < m_offset.size(); ++i) + { + written_bytes += m_offset[i].serialize(out, child, "offset"); + } + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(m_size, in); + read_member(m_sigma, in); + read_member(m_singleton_class_cnt, in); + read_member(m_class_cnt, in); + m_char2class.load(in); + m_class.load(in); + value_type offset_size = m_class_cnt - m_singleton_class_cnt; + m_offset.resize(offset_size); + for (value_type i = 0; i < offset_size; ++i) + { + m_offset[i].load(in); + } + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_singleton_class_cnt)); + ar(CEREAL_NVP(m_class_cnt)); + ar(CEREAL_NVP(m_char2class)); + ar(CEREAL_NVP(m_class)); + ar(CEREAL_NVP(m_offset)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_singleton_class_cnt)); + ar(CEREAL_NVP(m_class_cnt)); + ar(CEREAL_NVP(m_char2class)); + ar(CEREAL_NVP(m_class)); + ar(CEREAL_NVP(m_offset)); + } + iterator begin() + { + return {this, 0}; + }; + const_iterator end() + { + return {this, size()}; + }; + iterator begin() const + { + return {this, 0}; + }; + const_iterator end() const + { + return {this, size()}; + }; + bool operator==(wt_ap const & other) const noexcept + { + return (m_size == other.m_size) && (m_sigma == other.m_sigma) + && (m_singleton_class_cnt == other.m_singleton_class_cnt) && (m_class_cnt == other.m_class_cnt) + && (m_char2class == other.m_char2class) && (m_class == other.m_class) && (m_offset == other.m_offset); + } + bool operator!=(wt_ap const & other) const noexcept + { + return !(*this == other); + } +}; +} +#endif +#ifndef INCLUDED_SDSL_WT_BLCD +#define INCLUDED_SDSL_WT_BLCD +#include +#include +#include +#include +namespace sdsl +{ +struct balanced_shape; +template > +using wt_blcd = wt_pc; +template +struct _balanced_shape +{ + typedef typename t_wt::size_type size_type; + typedef std::pair tPII; + enum + { + lex_ordered = 1 + }; + template + static void construct_tree(t_rac & C, std::vector & temp_nodes) + { + size_type c = 0; + std::vector symbols; + std::for_each(std::begin(C), + std::end(C), + [&](decltype(*std::begin(C)) & freq) + { + if (freq > 0) + { + symbols.push_back(c); + } + ++c; + }); + uint64_t sigma = symbols.size(); + if (sigma > 0) + { + _construct_tree(pc_node::undef, symbols, 0, sigma, C, temp_nodes); + pc_node root = temp_nodes[0]; + for (uint64_t i = 1; i < temp_nodes.size(); ++i) + { + temp_nodes[i - 1] = temp_nodes[i]; + temp_nodes[i - 1].parent = (temp_nodes[i - 1].parent + temp_nodes.size() - 1) % temp_nodes.size(); + temp_nodes[i - 1].child[0] -= (temp_nodes[i - 1].child[0] != pc_node::undef); + temp_nodes[i - 1].child[1] -= (temp_nodes[i - 1].child[1] != pc_node::undef); + } + root.child[0] -= (root.child[0] != pc_node::undef); + root.child[1] -= (root.child[1] != pc_node::undef); + temp_nodes[temp_nodes.size() - 1] = root; + } + } + template + static tPII _construct_tree(uint64_t parent, + std::vector const & symbols, + uint64_t lb, + uint64_t sigma, + t_rac const & C, + std::vector & temp_nodes) + { + if (sigma == 1) + { + uint64_t freq = C[symbols[lb]]; + temp_nodes.emplace_back(pc_node(freq, symbols[lb], parent, pc_node::undef, pc_node::undef)); + return tPII(freq, temp_nodes.size() - 1); + } + else + { + temp_nodes.emplace_back(pc_node(0, 0, parent, pc_node::undef, pc_node::undef)); + uint64_t node_id = temp_nodes.size() - 1; + uint64_t l_sigma = (sigma + 1) / 2; + tPII freq_nptr_0 = _construct_tree(node_id, symbols, lb, l_sigma, C, temp_nodes); + tPII freq_nptr_1 = _construct_tree(node_id, symbols, lb + l_sigma, sigma - l_sigma, C, temp_nodes); + uint64_t freq = freq_nptr_0.first + freq_nptr_1.first; + temp_nodes[node_id].freq = freq; + temp_nodes[node_id].child[0] = freq_nptr_0.second; + temp_nodes[node_id].child[1] = freq_nptr_1.second; + return tPII(freq, node_id); + } + } +}; +struct balanced_shape +{ + template + using type = _balanced_shape; +}; +} +#endif +#ifndef INCLUDED_SDSL_WT_EPR +#define INCLUDED_SDSL_WT_EPR +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_RANK_SUPPORT_INT_V +#define INCLUDED_SDSL_RANK_SUPPORT_INT_V +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_RANK_SUPPORT_INT +#define INCLUDED_SDSL_RANK_SUPPORT_INT +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +class structure_tree_node; +} +#define likely(x) __builtin_expect((x), 1) +#define unlikely(x) __builtin_expect((x), 0) +namespace sdsl +{ +constexpr size_t floor_log2(size_t const n) +{ + return (n == 1) ? 0 : 1 + floor_log2(n >> 1); +} +constexpr size_t ceil_log2(size_t const n) +{ + return (n == 1) ? 0 : floor_log2(n - 1) + 1; +} +template +class rank_support_int +{ +public: + typedef typename int_vector<>::size_type size_type; + typedef typename int_vector<>::value_type value_type; + static_assert(alphabet_size > 2, "Rank support is only implemented on int_vectors with an alphabet size of > 2."); +protected: + template + static constexpr uintX_t bm_rec(const uintX_t w, const uint8_t length, const uint8_t max_length) + { + return (length >= max_length) ? w : bm_rec(w | (w << length), length << 1, max_length); + } + static std::array generate_mask_array() + { + std::array masks{}; + for (value_type v = 0; v < alphabet_size; ++v) + { + masks[v] = v; + for (uint8_t i = sigma_bits * 2; i < 64; i <<= 1) + masks[v] |= masks[v] << i; + } + uint64_t tmp_carry = masks[1]; + for (value_type v = 0; v < alphabet_size; ++v) + masks[v] |= tmp_carry << sigma_bits; + return masks; + } +protected: + static constexpr uint8_t sigma{alphabet_size}; + static constexpr uint8_t sigma_bits{ceil_log2(alphabet_size)}; + static constexpr uint8_t bits_per_word{(64 / sigma_bits) * sigma_bits}; + static constexpr uint64_t even_mask{bm_rec(bits::lo_set[sigma_bits], sigma_bits * 2, 64)}; + static constexpr uint64_t carry_select_mask{bm_rec(1ULL << sigma_bits, sigma_bits * 2, 64)}; + static const std::array masks; + int_vector<> const * m_v; +public: + rank_support_int(int_vector<> const * v = nullptr) + { + assert((v != nullptr) ? sigma_bits == v->width() : true); + m_v = v; + } + rank_support_int(rank_support_int const &) = default; + rank_support_int(rank_support_int &&) = default; + rank_support_int & operator=(rank_support_int const &) = default; + rank_support_int & operator=(rank_support_int &&) = default; + virtual ~rank_support_int() + {} + virtual size_type rank(const size_type i, const value_type v) const = 0; + virtual size_type operator()(const size_type idx, const value_type v) const = 0; + virtual size_type prefix_rank(const size_type i, const value_type v) const = 0; + virtual size_type serialize(std::ostream & out, structure_tree_node * v, const std::string name) const = 0; + virtual void load(std::istream & in, int_vector<> const * v = nullptr) = 0; + virtual void set_vector(int_vector<> const * v = nullptr) = 0; +protected: + static constexpr uint64_t mask_prefix(value_type const v, uint64_t const w_even, uint64_t const w_odd) noexcept + { + return ((masks[v] - w_even) & carry_select_mask) | (((masks[v] - w_odd) & carry_select_mask) << 1); + } + static constexpr uint64_t set_positions_prefix(const uint64_t w, const value_type v) noexcept + { + uint64_t const w_even = even_mask & w; + uint64_t const w_odd = even_mask & (w >> sigma_bits); + return mask_prefix(v, w_even, w_odd); + } + static constexpr uint64_t set_positions(const uint64_t w, const value_type v) noexcept + { + assert(v > 0); + uint64_t const w_even = even_mask & w; + uint64_t const w_odd = even_mask & (w >> sigma_bits); + uint64_t res = ((masks[v] - w_even) & ~(masks[v - 1] - w_even)) & carry_select_mask; + res |= (((masks[v] - w_odd) & ~(masks[v - 1] - w_odd)) & carry_select_mask) << 1; + return res; + } + template + static constexpr std::array + word_prefix_rank(const uint64_t word, const size_type bit_pos, const value_t... values) noexcept + { + uint64_t const mask = bits::lo_set[(bit_pos % bits_per_word) + 1]; + uint64_t const w_even = even_mask & word; + uint64_t const w_odd = even_mask & (word >> sigma_bits); + return {(bits::cnt(mask_prefix(values, w_even, w_odd) & mask))...}; + } + static constexpr uint32_t word_rank(const uint64_t word, const size_type bit_pos, const value_type v) noexcept + { + return bits::cnt(set_positions(word, v) & bits::lo_set[(bit_pos & 0x3F) + 1]); + } + static constexpr uint32_t full_word_prefix_rank(const uint64_t word, const value_type v) noexcept + { + return bits::cnt(set_positions_prefix(word, v)); + } + static constexpr uint32_t full_word_rank(const uint64_t word, const value_type v) noexcept + { + return bits::cnt(set_positions(word, v)); + } + static constexpr uint64_t extract_word(uint64_t const * data, const size_type word_position) noexcept + { + return *(data + word_position); + } +}; +template +const std::array rank_support_int::masks = generate_mask_array(); +} +#endif +namespace sdsl +{ +namespace detail +{ +template +class bit_compressed_word +{ +private: + static_assert(bits_per_value <= 64, "The maximum bit size is 64 for a value."); + static constexpr uint64_t max_size = (sizeof(uint64_t) << 3) / bits_per_value; + static constexpr uint64_t bit_mask = bits::lo_set[bits_per_value]; + uint64_t word{}; +public: + using size_type = size_t; + bit_compressed_word() = default; + bit_compressed_word(bit_compressed_word const &) = default; + bit_compressed_word(bit_compressed_word &&) = default; + bit_compressed_word & operator=(bit_compressed_word const &) = default; + bit_compressed_word & operator=(bit_compressed_word &&) = default; + ~bit_compressed_word() = default; + template + constexpr bit_compressed_word(it_t it, it_t end) noexcept + { + assign(it, end); + } + constexpr value_t operator[](size_t const index) const noexcept + { + assert(index < max_size); + uint64_t offset = index * bits_per_value; + return static_cast((word >> offset) & bit_mask); + } + template + constexpr void assign(it_t it, it_t end) noexcept + { + assert(static_cast(std::distance(it, end)) <= max_size); + for (size_t index = 0; it != end; ++it, ++index) + { + uint64_t offset = index * bits_per_value; + word = (word & ~(bit_mask << offset)) | uint64_t{*it} << offset; + } + } + constexpr operator uint64_t() const noexcept + { + return word; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, const std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, sdsl::util::class_name(*this)); + size_type written_bytes = sdsl::serialize(word, out, child, "compressed_word"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + sdsl::load(word, in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(word)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(word)); + } +}; +} +} +namespace sdsl +{ +template +class rank_support_int_v : public rank_support_int +{ +private: + using base_t = rank_support_int; + using base_t::bits_per_word; + using base_t::sigma; + using base_t::sigma_bits; + static constexpr uint64_t values_per_word{64ULL / sigma_bits}; + static constexpr uint64_t values_per_block{words_per_block * values_per_word}; + static constexpr uint64_t values_per_superblock{blocks_per_superblock * values_per_block}; + static constexpr uint64_t words_per_superblock{words_per_block * blocks_per_superblock}; + static constexpr uint64_t effective_alphabet_size = alphabet_size - 1; + struct superblock_entry; + std::vector superblocks{}; + typename base_t::size_type text_size{}; +public: + using typename base_t::size_type; + using typename base_t::value_type; + explicit rank_support_int_v(int_vector<> const * text_ptr = nullptr) : rank_support_int(nullptr) + { + static_assert(blocks_per_superblock > 1, "There must be at least two blocks per superblock!"); + if (text_ptr == nullptr || text_ptr->empty()) + return; + text_size = text_ptr->size(); + uint64_t const word_count = (text_size + values_per_word - 1) / values_per_word; + size_type const superblock_count = (word_count + words_per_superblock - 1) / words_per_superblock; + std::array buf_blocks{}; + std::array buf_superblocks{}; + superblocks.resize(superblock_count); + auto text_slice_it = text_ptr->begin(); + uint64_t word_id = 0; + for (auto entry_it = superblocks.begin(); entry_it != superblocks.end(); ++entry_it) + { + for (auto & compressed_word : entry_it->superblock_text) + { + auto text_slice_end = + std::next(text_slice_it, + std::min(std::distance(text_slice_it, text_ptr->end()), values_per_word)); + compressed_word.assign(text_slice_it, text_slice_end); + text_slice_it = text_slice_end; + } + auto superblock_it = entry_it->superblocks.begin(); + for (size_t letter_rank = 0; letter_rank < effective_alphabet_size; ++letter_rank, ++superblock_it) + { + buf_superblocks[letter_rank] += buf_blocks[letter_rank]; + *superblock_it = buf_superblocks[letter_rank]; + buf_blocks[letter_rank] = 0; + } + auto text_it = entry_it->superblock_text.begin(); + for (auto block_it = entry_it->blocks.begin(); word_id < word_count && block_it != entry_it->blocks.end(); + ++word_id, ++text_it) + { + for (size_t letter_rank = 0; letter_rank < effective_alphabet_size; ++letter_rank, ++block_it) + { + buf_blocks[letter_rank] += base_t::full_word_prefix_rank(*text_it, letter_rank); + *block_it = buf_blocks[letter_rank]; + } + } + if (word_id < word_count) + { + for (uint64_t letter = 0; letter < effective_alphabet_size; ++letter) + buf_blocks[letter] += base_t::full_word_prefix_rank(*text_it, letter); + ++word_id; + } + } + } + rank_support_int_v(rank_support_int_v const &) = default; + rank_support_int_v(rank_support_int_v &&) = default; + rank_support_int_v & operator=(rank_support_int_v const &) = default; + rank_support_int_v & operator=(rank_support_int_v &&) = default; + ~rank_support_int_v() = default; + size_type rank(const size_type position, const value_type v) const + { + switch (v) + { + case 0: + return prefix_rank_impl(position, v); + case sigma - 1: + return position - prefix_rank_impl(position, v - 1); + default: + return prefix_rank_impl(position, v); + } + } + inline size_type operator()(const size_type position, const value_type v) const + { + return rank(position, v); + } + size_type prefix_rank(const size_type position, const value_type v) const + { + assert(position <= text_size); + assert(v <= sigma); + if (unlikely(v == sigma - 1)) + return position; + return prefix_rank_impl(position, v); + } + size_type size() const + { + return text_size; + } + value_type value_at(const size_type position) const + { + assert(position < text_size); + return superblocks[to_superblock_position(position)].value_at(position); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, const std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = sdsl::serialize(superblocks, out, child, "superblocks_vector"); + written_bytes += write_member(text_size, out, child, "text_size"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in, int_vector<> const * ) + { + this->m_v = nullptr; + sdsl::load(superblocks, in); + read_member(text_size, in); + } + friend bool operator==(rank_support_int_v const & lhs, rank_support_int_v const & rhs) noexcept + { + return (lhs.superblocks == rhs.superblocks) && (lhs.text_size == rhs.text_size); + } + friend bool operator!=(rank_support_int_v const & lhs, rank_support_int_v const & rhs) noexcept + { + return !(lhs == rhs); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(superblocks)); + ar(CEREAL_NVP(text_size)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(superblocks)); + ar(CEREAL_NVP(text_size)); + } + void set_vector(int_vector<> const * ) + {} +private: + constexpr size_type to_superblock_position(size_t const position) const noexcept + { + return position / values_per_superblock; + } + template + size_type prefix_rank_impl(size_type const position, const value_type v) const + { + assert(position <= text_size); + if (unlikely(text_size == 0)) + return 0; + superblock_entry const & entry = superblocks[to_superblock_position(position)]; + return entry.template superblock_rank(v) + + entry.template block_rank(position, v) + + entry.template in_block_rank(position, v); + } +}; +template +struct rank_support_int_v::superblock_entry +{ + using size_type = typename base_t::size_type; + static constexpr size_t block_offset = effective_alphabet_size; + static constexpr size_t bits_per_block_value = ceil_log2(values_per_superblock); + using block_value_type = std::conditional_t>; + std::array superblocks; + std::array blocks; + std::array, words_per_superblock> superblock_text; + template + constexpr size_type superblock_rank(value_type const v) const noexcept + { + return superblocks[v] - ((compute_prefix_delta) ? superblocks[v - 1] : 0); + } + template + constexpr size_type block_rank(size_t const position, value_type const v) const noexcept + { + size_type const block_id = block_position_in_superblock(position); + size_type const block_position = absolute_block_position(block_id) + v; + return (block_id != 0) * (blocks[block_position] - ((compute_prefix_delta) ? blocks[block_position - 1] : 0)); + } + template + constexpr size_type in_block_rank(size_t const position, value_type const v) const noexcept + { + size_type const bit_pos = absolute_bit_position(position); + uint64_t const word = superblock_text[absolute_word_position(bit_pos)]; + return (position % values_per_block != 0) * word_prefix_rank(word, bit_pos, v); + } + value_type value_at(size_type position) const noexcept + { + size_type bit_position = absolute_bit_position(position); + return superblock_text[absolute_word_position(bit_position)][position % values_per_word]; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, const std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, sdsl::util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += sdsl::serialize(superblocks.size(), out, child, "prefix_superblock_counts"); + for (auto const & x : superblocks) + written_bytes += sdsl::serialize(x, out, child, "[]"); + written_bytes += sdsl::serialize(blocks.size(), out, child, "prefix_block_counts"); + for (auto const & x : blocks) + written_bytes += sdsl::serialize(x, out, child, "[]"); + written_bytes += sdsl::serialize(superblock_text.size(), out, child, "superblock_text"); + for (auto const & x : superblock_text) + written_bytes += sdsl::serialize(x, out, child, "[]"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + size_type array_size; + sdsl::load(array_size, in); + assert(array_size == superblocks.size()); + for (size_type idx = 0; idx < array_size; ++idx) + sdsl::load(superblocks[idx], in); + sdsl::load(array_size, in); + assert(array_size == blocks.size()); + for (size_type idx = 0; idx < array_size; ++idx) + sdsl::load(blocks[idx], in); + sdsl::load(array_size, in); + assert(array_size == superblock_text.size()); + for (size_type idx = 0; idx < array_size; ++idx) + sdsl::load(superblock_text[idx], in); + } + friend bool operator==(superblock_entry const & lhs, superblock_entry const & rhs) noexcept + { + return (lhs.superblocks == rhs.superblocks) && (lhs.blocks == rhs.blocks) + && (lhs.superblock_text == rhs.superblock_text); + } + friend bool operator!=(superblock_entry const & lhs, superblock_entry const & rhs) noexcept + { + return !(lhs == rhs); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(superblocks)); + ar(CEREAL_NVP(blocks)); + ar(CEREAL_NVP(superblock_text)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(superblocks)); + ar(CEREAL_NVP(blocks)); + ar(CEREAL_NVP(superblock_text)); + } +private: + static constexpr size_type block_position_in_superblock(size_t const position) noexcept + { + return (position / values_per_block) % blocks_per_superblock; + } + static constexpr size_type absolute_block_position(size_t const block_position) noexcept + { + return (block_position + (block_position == 0) - 1) * block_offset; + } + static constexpr size_type absolute_bit_position(size_t const position) noexcept + { + return (position % values_per_superblock) * sigma_bits; + } + static constexpr size_type absolute_word_position(size_t const bit_position) noexcept + { + return bit_position / bits_per_word; + } + template + static constexpr auto word_prefix_rank(const uint64_t word, const uint64_t bit_pos, const value_type v) -> + typename std::enable_if::type + { + auto && prefix_rank = base_t::word_prefix_rank(word, bit_pos, v - 1, v); + return prefix_rank[1] - prefix_rank[0]; + } + template + static constexpr auto word_prefix_rank(const uint64_t word, const uint64_t bit_pos, const value_type v) -> + typename std::enable_if::type + { + return base_t::word_prefix_rank(word, bit_pos, v)[0]; + } +}; +} +#endif +namespace sdsl +{ +template , class t_tree_strat = byte_tree<>> +class wt_epr +{ +public: + typedef typename t_tree_strat::template type tree_strat_type; + typedef int_vector<>::size_type size_type; + typedef int_vector<>::value_type value_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef typename int_vector<>::difference_type difference_type; + typedef wt_tag index_category; + typedef byte_alphabet_tag alphabet_category; + enum + { + lex_ordered = true + }; +private: + static constexpr bool has_inblock_text = std::is_same>::value; + size_type m_size = 0; + size_type m_sigma = 0; + int_vector<> m_bv; + rank_type m_bv_rank; + template + auto construct_init_rank_select(int_vector<> intermediate_bitvector) -> std::enable_if_t + { + m_bv_rank = rank_type{&intermediate_bitvector}; + } + template + auto construct_init_rank_select(int_vector<> intermediate_bitvector) -> std::enable_if_t + { + m_bv = std::move(intermediate_bitvector); + m_bv_rank = rank_type{&m_bv}; + } + template + auto value_at(size_type const position) const -> std::enable_if_t + { + assert(position < size()); + return m_bv_rank.value_at(position); + } + template + auto value_at(size_type const position) const -> std::enable_if_t + { + assert(position < size()); + return m_bv[position]; + } +public: + size_type const & sigma = m_sigma; + int_vector<> const & bv = m_bv; + wt_epr() = default; + template + wt_epr(t_it begin, t_it end) : m_size(std::distance(begin, end)) + { + if (0 == m_size) + return; + std::vector C; + calculate_character_occurences(begin, end, C); + calculate_effective_alphabet_size(C, m_sigma); + if (m_sigma > alphabet_size) + throw std::domain_error{"The given text uses an alphabet that is larger than the explicitly given " + "alphabet size."}; + int_vector<> intermediate_bitvector{}; + intermediate_bitvector.width(std::ceil(std::log2(m_sigma))); + intermediate_bitvector.resize(m_size); + std::copy(begin, end, intermediate_bitvector.begin()); + construct_init_rank_select(std::move(intermediate_bitvector)); + } + template + wt_epr(t_it begin, t_it end, std::string) : wt_epr(begin, end) + {} + wt_epr(wt_epr const & wt) : m_size(wt.m_size), m_sigma(wt.m_sigma), m_bv(wt.m_bv), m_bv_rank(wt.m_bv_rank) + { + m_bv_rank.set_vector(&m_bv); + } + wt_epr(wt_epr && wt) : + m_size(wt.m_size), + m_sigma(wt.m_sigma), + m_bv(std::move(wt.m_bv)), + m_bv_rank(std::move(wt.m_bv_rank)) + { + m_bv_rank.set_vector(&m_bv); + } + wt_epr & operator=(wt_epr const & wt) + { + if (this != &wt) + { + wt_epr tmp(wt); + *this = std::move(tmp); + } + return *this; + } + wt_epr & operator=(wt_epr && wt) + { + if (this != &wt) + { + m_size = wt.m_size; + m_sigma = wt.m_sigma; + m_bv = std::move(wt.m_bv); + m_bv_rank = std::move(wt.m_bv_rank); + m_bv_rank.set_vector(&m_bv); + } + return *this; + } + size_type size() const + { + return m_size; + } + bool empty() const + { + return m_size == 0; + } + auto operator[](size_type const i) const + { + assert(i < size()); + return value_at(i); + }; + size_type rank(size_type i, value_type c) const + { + assert(i <= size()); + return m_bv_rank.rank(i, c); + }; + std::pair inverse_select(size_type i) const + { + assert(i < size()); + value_type value = (*this)[i]; + return std::make_pair(m_bv_rank.rank(i, value), value); + } + template > + t_ret_type lex_count(size_type i, size_type j, value_type c) const + { + assert(i <= j and j <= size()); + size_type smaller = 0; + size_type prefix_i_c = m_bv_rank.prefix_rank(i, c); + size_type prefix_i_c_1 = 0; + size_type greater = j - i - m_bv_rank.prefix_rank(j, c) + prefix_i_c; + if (c > 0) + { + prefix_i_c_1 = m_bv_rank.prefix_rank(i, c - 1); + smaller = m_bv_rank.prefix_rank(j, c - 1) - prefix_i_c_1; + } + size_type rank = prefix_i_c - prefix_i_c_1; + return t_ret_type{rank, smaller, greater}; + } + template > + t_ret_type lex_smaller_count(size_type i, value_type c) const + { + assert(i <= size()); + size_type prefix_count_smaller = 0; + if (c > 0) + prefix_count_smaller = m_bv_rank.prefix_rank(i, c - 1); + return t_ret_type{m_bv_rank.prefix_rank(i, c) - prefix_count_smaller, prefix_count_smaller}; + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += write_member(m_sigma, out, child, "sigma"); + written_bytes += m_bv.serialize(out, child, "bv"); + written_bytes += m_bv_rank.serialize(out, child, "bv_rank"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(m_size, in); + read_member(m_sigma, in); + m_bv.load(in); + m_bv_rank.load(in, &m_bv); + } + friend bool operator==(wt_epr const & lhs, wt_epr const & rhs) noexcept + { + return (lhs.m_size == rhs.m_size) && (lhs.m_sigma == rhs.m_sigma) && (lhs.m_bv == rhs.m_bv) + && (lhs.m_bv_rank == rhs.m_bv_rank); + } + friend bool operator!=(wt_epr const & lhs, wt_epr const & rhs) noexcept + { + return !(lhs == rhs); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_bv)); + ar(CEREAL_NVP(m_bv_rank)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_bv)); + ar(CEREAL_NVP(m_bv_rank)); + m_bv_rank.set_vector(&m_bv); + } +}; +} +#endif +#ifndef INCLUDED_SDSL_WT_GMR +#define INCLUDED_SDSL_WT_GMR +#include +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +template , + class t_bv = bit_vector, + class t_rank = typename t_bv::rank_1_type> +class inv_multi_perm_support +{ +public: + typedef t_rac iv_type; + typedef typename iv_type::size_type size_type; + typedef typename iv_type::value_type value_type; + typedef typename iv_type::difference_type difference_type; + typedef t_bv bit_vector_type; + typedef t_rank rank_type; + typedef random_access_const_iterator const_iterator; +private: + iv_type const * m_perm = nullptr; + uint64_t m_chunksize; + int_vector<> m_back_pointer; + bit_vector_type m_marked; + rank_type m_marked_rank; +public: + inv_multi_perm_support(){}; + inv_multi_perm_support(iv_type const * perm, int_vector<> & iv, uint64_t chunksize) : + m_perm(perm), + m_chunksize(chunksize) + { + bit_vector marked(iv.size(), 0); + bit_vector done(m_chunksize, 0); + size_type max_back_pointer = 0; + for (size_type i = 0, off = 0; i < iv.size(); ++i) + { + if (i == off + chunksize) + { + off = i; + util::set_to_value(done, 0); + } + if (!done[i - off]) + { + done[i - off] = 1; + size_type back_pointer = i, j = i, j_new = 0; + uint64_t steps = 0, all_steps = 0; + while ((j_new = (iv[j] + off)) != i) + { + j = j_new; + done[j - off] = 1; + ++steps; + ++all_steps; + if (t_s == steps) + { + max_back_pointer = std::max(max_back_pointer, back_pointer - off); + marked[j] = 1; + steps = 0; + back_pointer = j; + } + } + if (all_steps > t_s) + { + marked[i] = 1; + max_back_pointer = std::max(max_back_pointer, back_pointer - off); + } + } + } + m_marked = t_bv(std::move(marked)); + util::init_support(m_marked_rank, &m_marked); + util::set_to_value(done, 0); + size_type n_bp = m_marked_rank(iv.size()); + m_back_pointer = int_vector<>(n_bp, 0, bits::hi(max_back_pointer) + 1); + for (size_type i = 0, off = 0; i < iv.size(); ++i) + { + if (i == off + chunksize) + { + off = i; + util::set_to_value(done, 0); + } + if (!done[i - off]) + { + done[i - off] = 1; + size_type back_pointer = i, j = i, j_new = 0; + uint64_t steps = 0, all_steps = 0; + while ((j_new = (iv[j] + off)) != i) + { + j = j_new; + done[j - off] = 1; + ++steps; + ++all_steps; + if (t_s == steps) + { + m_back_pointer[m_marked_rank(j)] = back_pointer - off; + steps = 0; + back_pointer = j; + } + } + if (all_steps > t_s) + { + m_back_pointer[m_marked_rank(i)] = back_pointer - off; + } + } + } + } + inv_multi_perm_support(inv_multi_perm_support const & p) : + m_perm(p.m_perm), + m_chunksize(p.m_chunksize), + m_back_pointer(p.m_back_pointer), + m_marked(p.m_marked), + m_marked_rank(p.m_marked_rank) + { + m_marked_rank.set_vector(&m_marked); + } + inv_multi_perm_support(inv_multi_perm_support && p) + { + *this = std::move(p); + } + inv_multi_perm_support & operator=(inv_multi_perm_support const & p) + { + if (this != &p) + { + m_perm = p.m_perm; + m_chunksize = p.m_chunksize; + m_back_pointer = p.m_back_pointer; + m_marked = p.m_marked; + m_marked_rank = p.m_marked_rank; + m_marked_rank.set_vector(&m_marked); + } + return *this; + } + inv_multi_perm_support & operator=(inv_multi_perm_support && p) + { + if (this != &p) + { + m_perm = std::move(p.m_perm); + m_chunksize = std::move(p.m_chunksize); + m_back_pointer = std::move(p.m_back_pointer); + m_marked = std::move(p.m_marked); + m_marked_rank = std::move(p.m_marked_rank); + m_marked_rank.set_vector(&m_marked); + } + return *this; + } + size_type size() const + { + return nullptr == m_perm ? 0 : m_perm->size(); + } + bool empty() const + { + return size() == 0; + } + value_type operator[](size_type i) const + { + size_type off = (i / m_chunksize) * m_chunksize; + size_type j = i, j_new = 0; + while ((j_new = ((*m_perm)[j]) + off) != i) + { + if (m_marked[j]) + { + j = m_back_pointer[m_marked_rank(j)] + off; + while ((j_new = ((*m_perm)[j]) + off) != i) + j = j_new; + } + else + { + j = j_new; + } + } + return j; + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } + void set_vector(iv_type const * v) + { + m_perm = v; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_chunksize, out, child, "chunksize"); + written_bytes += m_back_pointer.serialize(out, child, "back_pointer"); + written_bytes += m_marked.serialize(out, child, "marked"); + written_bytes += m_marked_rank.serialize(out, child, "marked_rank"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in, iv_type const * v = nullptr) + { + set_vector(v); + read_member(m_chunksize, in); + m_back_pointer.load(in); + m_marked.load(in); + m_marked_rank.load(in, &m_marked); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_chunksize)); + ar(CEREAL_NVP(m_back_pointer)); + ar(CEREAL_NVP(m_marked)); + ar(CEREAL_NVP(m_marked_rank)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_chunksize)); + ar(CEREAL_NVP(m_back_pointer)); + ar(CEREAL_NVP(m_marked)); + ar(CEREAL_NVP(m_marked_rank)); + m_marked_rank.set_vector(&m_marked); + } + bool operator==(inv_multi_perm_support const & other) const noexcept + { + return (m_chunksize == other.m_chunksize) && (m_back_pointer == other.m_back_pointer) + && (m_marked == other.m_marked) && (m_marked_rank == other.m_marked_rank); + } + bool operator!=(inv_multi_perm_support const & other) const noexcept + { + return !(*this == other); + } +}; +template +void _transform_to_compressed(int_vector<> & iv, + typename std::enable_if>::value), t_rac>::type & rac, + const std::string filename) +{ + std::string tmp_file_name = tmp_file(filename, "_compress_int_vector"); + store_to_file(iv, tmp_file_name); + util::clear(iv); + int_vector_buffer<> buf(tmp_file_name, std::ios::in, 1024 * 1024, iv.width()); + rac = t_rac(buf); + buf.close(true); +} +template +void _transform_to_compressed(int_vector<> & iv, + typename std::enable_if>::value, t_rac>::type & rac, + const std::string) +{ + rac = std::move(iv); +} +template , + class t_bitvector = bit_vector, + class t_select = typename t_bitvector::select_1_type, + class t_select_zero = typename t_bitvector::select_0_type> +class wt_gmr_rs +{ +public: + typedef int_vector<>::size_type size_type; + typedef int_vector<>::value_type value_type; + typedef typename t_bitvector::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef wt_tag index_category; + typedef int_alphabet_tag alphabet_category; + enum + { + lex_ordered = 0 + }; +private: + t_bitvector m_bv_blocks; + t_rac m_e; + t_select m_bv_blocks_select1; + t_select_zero m_bv_blocks_select0; + uint64_t m_size; + uint64_t m_block_size = 0; + uint64_t m_blocks; + uint64_t m_sigma = 0; +public: + size_type const & sigma = m_sigma; + wt_gmr_rs() = default; + template + wt_gmr_rs(t_it begin, t_it end, std::string tmp_dir = ram_file_name("")) : m_size(std::distance(begin, end)) + { + for (auto it = begin; it != end; ++it) + { + value_type value = *it; + if (m_block_size < value) + m_block_size = value; + } + ++m_block_size; + m_blocks = (m_size + m_block_size - 1) / m_block_size; + bit_vector b(m_size + m_block_size * m_blocks + 1, 0); + int_vector<> symbols(m_block_size, 0, bits::hi(m_size) + 1); + { + int_vector<> tmp(m_block_size * m_blocks, 0, bits::hi(m_block_size) + 1); + uint64_t j = 0, offset = 0; + for (auto it = begin; it != end; ++it, ++j) + { + if (j == m_block_size) + { + ++offset; + j = 0; + } + ++tmp[(*it) * m_blocks + offset]; + } + for (uint64_t i = 0; i < symbols.size(); ++i) + { + for (uint64_t j = m_blocks * i; j < (i + 1) * m_blocks; ++j) + { + symbols[i] += tmp[j]; + } + } + for (uint64_t i = 0, l = 1; i < tmp.size(); ++i, ++l) + { + for (uint64_t j = 0; j < tmp[i]; ++j) + b[l++] = 1; + } + bool write = true; + uint64_t blocks = 0; + for (uint64_t i = 1; i < b.size(); ++i) + { + if (blocks == m_blocks) + { + blocks = 0; + write = true; + } + if (b[i]) + { + if (write) + { + ++m_sigma; + write = false; + } + } + else + ++blocks; + } + m_bv_blocks = t_bitvector(std::move(b)); + } + int_vector<> positions(m_size, 0, bits::hi(m_block_size) + 1); + for (uint64_t i = 0, tmp = 0, sum = 0; i < m_block_size; ++i) + { + tmp = symbols[i]; + symbols[i] = sum; + sum += tmp; + } + for (auto it = begin; it != end;) + { + for (uint64_t j = 0; j < m_block_size and it != end; ++it, ++j) + { + positions[symbols[*it]++] = j; + } + } + _transform_to_compressed(positions, m_e, tmp_dir); + util::init_support(m_bv_blocks_select0, &m_bv_blocks); + util::init_support(m_bv_blocks_select1, &m_bv_blocks); + } + wt_gmr_rs(wt_gmr_rs const & wt) : + m_bv_blocks(wt.m_bv_blocks), + m_e(wt.m_e), + m_bv_blocks_select1(wt.m_bv_blocks_select1), + m_bv_blocks_select0(wt.m_bv_blocks_select0), + m_size(wt.m_size), + m_block_size(wt.m_block_size), + m_blocks(wt.m_blocks), + m_sigma(wt.m_sigma) + { + m_bv_blocks_select1.set_vector(&m_bv_blocks); + m_bv_blocks_select0.set_vector(&m_bv_blocks); + } + wt_gmr_rs(wt_gmr_rs && wt) : + m_bv_blocks(std::move(wt.m_bv_blocks)), + m_e(std::move(wt.m_e)), + m_bv_blocks_select1(std::move(wt.m_bv_blocks_select1)), + m_bv_blocks_select0(std::move(wt.m_bv_blocks_select0)), + m_size(wt.m_size), + m_block_size(wt.m_block_size), + m_blocks(wt.m_blocks), + m_sigma(wt.m_sigma) + { + m_bv_blocks_select1.set_vector(&m_bv_blocks); + m_bv_blocks_select0.set_vector(&m_bv_blocks); + } + wt_gmr_rs & operator=(wt_gmr_rs const & wt) + { + wt_gmr_rs tmp(wt); + *this = std::move(tmp); + return *this; + } + wt_gmr_rs & operator=(wt_gmr_rs && wt) + { + m_bv_blocks = std::move(wt.m_bv_blocks); + m_e = std::move(wt.m_e); + m_bv_blocks_select1 = std::move(wt.m_bv_blocks_select1); + m_bv_blocks_select1.set_vector(&m_bv_blocks); + m_bv_blocks_select0 = std::move(wt.m_bv_blocks_select0); + m_bv_blocks_select0.set_vector(&m_bv_blocks); + m_size = wt.m_size; + m_block_size = wt.m_block_size; + m_blocks = wt.m_blocks; + m_sigma = wt.m_sigma; + return *this; + } + size_type size() const + { + return m_size; + } + bool empty() const + { + return m_size == 0; + } + value_type operator[](size_type i) const + { + assert(i < m_size); + size_type block = i / m_block_size + 1, val = i % m_block_size, search_begin, search_end, j; + while (true) + { + j = m_bv_blocks_select0(block) + 1; + search_begin = j - block; + if (m_bv_blocks[j]) + { + search_end = m_bv_blocks_select0(block + 1) - (block); + if (search_end - search_begin < 50) + { + while (search_begin < search_end and m_e[search_begin] <= val) + { + if (m_e[search_begin] == val) + { + return (block - 1) / m_blocks; + } + ++search_begin; + } + } + else + { + if (std::binary_search(m_e.begin() + search_begin, m_e.begin() + search_end, val)) + { + return (block - 1) / m_blocks; + } + } + } + block += m_blocks; + } + } + size_type rank(size_type i, value_type c) const + { + if (0 == i or c > m_block_size - 1) + { + return 0; + } + size_type offset = 0; + size_type ones_before_cblock = m_bv_blocks_select0(c * m_blocks + 1) - c * m_blocks; + auto begin = m_e.begin() + m_bv_blocks_select0(c * m_blocks + (i - 1) / m_block_size + 1) + - (c * m_blocks + (i - 1) / m_block_size + 1) + 1; + auto end = m_e.begin() + m_bv_blocks_select0(c * m_blocks + (i - 1) / m_block_size + 2) + - (c * m_blocks + (i - 1) / m_block_size + 1); + size_type val = (i - 1) % m_block_size; + if (end - begin < 50) + { + offset = std::find_if(begin, + end, + [&val](auto const && x) + { + return x > val; + }) + - begin; + } + else + { + offset = std::lower_bound(begin, end, val + 1) - begin; + } + return (begin - m_e.begin()) + offset - ones_before_cblock; + } + std::pair inverse_select(size_type i) const + { + assert(i < m_size); + size_type block = i / m_block_size + 1, val = i % m_block_size, offset = 0, search_begin, search_end, j; + while (true) + { + j = m_bv_blocks_select0(block) + 1; + search_begin = j - block; + if (m_bv_blocks[j]) + { + search_end = m_bv_blocks_select0(block + 1) - (block); + offset = 0; + if (search_end - search_begin < 50) + { + while (search_begin < search_end and m_e[search_begin] <= val) + { + if (m_e[search_begin] == val) + { + value_type c = (block - 1) / m_blocks; + size_type ones_before_cblock = m_bv_blocks_select0(c * m_blocks + 1) - (c * m_blocks); + size_type r = search_begin - ones_before_cblock; + return std::make_pair(r, c); + } + ++search_begin; + } + } + else + { + offset = std::lower_bound(m_e.begin() + search_begin, m_e.begin() + search_end, val) - m_e.begin(); + if (offset < search_end) + { + if (m_e[offset] == val) + { + value_type c = (block - 1) / m_blocks; + size_type ones_before_cblock = m_bv_blocks_select0(c * m_blocks + 1) - (c * m_blocks); + size_type r = offset - ones_before_cblock; + return std::make_pair(r, c); + } + } + } + } + block += m_blocks; + } + } + size_type select(size_type i, value_type c) const + { + size_type k = m_bv_blocks_select0(c * m_blocks + 1) - (c * m_blocks) + i; + return (m_bv_blocks_select1(k) - k) * m_block_size + m_e[k - 1] - c * m_blocks * m_block_size; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += write_member(m_block_size, out, child, "block_size"); + written_bytes += write_member(m_blocks, out, child, "blocks"); + written_bytes += write_member(m_sigma, out, child, "sigma"); + written_bytes += m_e.serialize(out, child, "E"); + written_bytes += m_bv_blocks.serialize(out, child, "bv_blocks"); + written_bytes += m_bv_blocks_select0.serialize(out, child, "bv_blocks_select0"); + written_bytes += m_bv_blocks_select1.serialize(out, child, "bv_blocks_select1"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(m_size, in); + read_member(m_block_size, in); + read_member(m_blocks, in); + read_member(m_sigma, in); + m_e.load(in); + m_bv_blocks.load(in); + m_bv_blocks_select0.load(in, &m_bv_blocks); + m_bv_blocks_select1.load(in, &m_bv_blocks); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_block_size)); + ar(CEREAL_NVP(m_blocks)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_e)); + ar(CEREAL_NVP(m_bv_blocks)); + ar(CEREAL_NVP(m_bv_blocks_select0)); + ar(CEREAL_NVP(m_bv_blocks_select1)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_block_size)); + ar(CEREAL_NVP(m_blocks)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_e)); + ar(CEREAL_NVP(m_bv_blocks)); + ar(CEREAL_NVP(m_bv_blocks_select0)); + m_bv_blocks_select0.set_vector(&m_bv_blocks); + ar(CEREAL_NVP(m_bv_blocks_select1)); + m_bv_blocks_select1.set_vector(&m_bv_blocks); + } + iterator begin() + { + return {this, 0}; + }; + const_iterator end() + { + return {this, size()}; + }; + iterator begin() const + { + return {this, 0}; + }; + const_iterator end() const + { + return {this, size()}; + }; + bool operator==(wt_gmr_rs const & other) const noexcept + { + return (m_size == other.m_size) && (m_block_size == other.m_block_size) && (m_blocks == other.m_blocks) + && (m_sigma == other.m_sigma) && (m_e == other.m_e) && (m_bv_blocks == other.m_bv_blocks) + && (m_bv_blocks_select0 == other.m_bv_blocks_select0) && (m_bv_blocks_select1 == other.m_bv_blocks_select1); + } + bool operator!=(wt_gmr_rs const & other) const noexcept + { + return !(*this == other); + } +}; +template , + class t_inverse_support = inv_multi_perm_support<32, t_rac>, + class t_bitvector = bit_vector, + class t_select = typename t_bitvector::select_1_type, + class t_select_zero = typename t_bitvector::select_0_type> +class wt_gmr +{ +public: + typedef typename t_rac::size_type size_type; + typedef typename t_rac::value_type value_type; + typedef typename t_bitvector::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef wt_tag index_category; + typedef int_alphabet_tag alphabet_category; + enum + { + lex_ordered = 0 + }; +private: + t_bitvector m_bv_blocks; + t_bitvector m_bv_chunks; + t_rac m_perm; + t_inverse_support m_ips; + t_select m_bv_blocks_select1, m_bv_chunks_select1; + t_select_zero m_bv_blocks_select0, m_bv_chunks_select0; + uint64_t m_size; + uint64_t m_max_symbol = 0; + uint64_t m_chunks; + uint64_t m_chunksize; + uint64_t m_sigma = 0; +public: + size_type const & sigma = m_sigma; + wt_gmr() = default; + template + wt_gmr(t_it begin, t_it end, std::string tmp_dir = ram_file_name("")) : m_size(std::distance(begin, end)) + { + for (auto it = begin; it != end; ++it) + { + value_type value = *it; + if (m_max_symbol < value) + m_max_symbol = value; + } + ++m_max_symbol; + m_chunksize = (1ULL << (bits::hi(m_max_symbol - 1) + 1)); + m_chunks = (m_size + m_chunksize - 1) / m_chunksize; + { + bit_vector b(m_size + m_max_symbol * m_chunks + 1, 0); + int_vector<> tmp(m_max_symbol * m_chunks, 0, bits::hi(m_max_symbol - 1) + 2); + uint64_t offset = 0, j = 0; + for (auto it = begin; it != end; ++it, ++j) + { + if (j == m_chunksize) + { + ++offset; + j = 0; + } + ++tmp[(*it) * m_chunks + offset]; + } + for (uint64_t i = 0, l = 1; i < tmp.size(); ++i, ++l) + for (uint64_t j = 0; j < tmp[i]; ++j) + b[l++] = 1; + bool write = true; + uint64_t blocks = 0; + for (uint64_t i = 1; i < b.size(); ++i) + { + if (blocks == m_chunks) + { + blocks = 0; + write = true; + } + if (b[i]) + { + if (write) + { + ++m_sigma; + write = false; + } + } + else + ++blocks; + } + m_bv_blocks = t_bitvector(std::move(b)); + } + { + uint64_t x_pos = 0; + bit_vector x(m_size + m_chunks * m_max_symbol + 1, 0); + int_vector<> perm(m_size, 0, bits::hi(m_max_symbol - 1) + 1); + for (uint64_t i = 0; i < m_chunks; ++i) + { + int_vector<> symbols(m_max_symbol, 0, bits::hi(m_max_symbol - 1) + 2); + for (uint64_t j = i * m_chunksize; j < (i + 1) * m_chunksize and j < m_size; ++j) + { + ++symbols[*(begin + j)]; + } + for (uint64_t j = 0; j < m_max_symbol; ++j, ++x_pos) + for (uint64_t k = 0; k < symbols[j]; ++k) + x[++x_pos] = 1; + for (uint64_t j = 0, tmp = 0, sum = 0; j < m_max_symbol; ++j) + { + tmp = symbols[j]; + symbols[j] = sum; + sum += tmp; + } + for (uint64_t j = i * m_chunksize, k = 0; j < (i + 1) * m_chunksize and j < m_size; ++j, ++k) + { + perm[i * m_chunksize + (symbols[*(begin + j)]++)] = k; + } + } + m_bv_chunks = t_bitvector(std::move(x)); + m_ips = t_inverse_support(&m_perm, perm, m_chunksize); + _transform_to_compressed(perm, m_perm, tmp_dir); + m_ips.set_vector(&m_perm); + } + util::init_support(m_bv_chunks_select1, &m_bv_chunks); + util::init_support(m_bv_chunks_select0, &m_bv_chunks); + util::init_support(m_bv_blocks_select1, &m_bv_blocks); + util::init_support(m_bv_blocks_select0, &m_bv_blocks); + } + wt_gmr(wt_gmr const & wt) : + m_bv_blocks(wt.m_bv_blocks), + m_bv_chunks(wt.m_bv_chunks), + m_perm(wt.m_perm), + m_ips(wt.m_ips), + m_bv_blocks_select1(wt.m_bv_blocks_select1), + m_bv_chunks_select1(wt.m_bv_chunks_select1), + m_bv_blocks_select0(wt.m_bv_blocks_select0), + m_bv_chunks_select0(wt.m_bv_chunks_select0), + m_size(wt.m_size), + m_max_symbol(wt.m_max_symbol), + m_chunks(wt.m_chunks), + m_chunksize(wt.m_chunksize), + m_sigma(wt.m_sigma) + { + m_ips.set_vector(&m_perm); + m_bv_blocks_select1.set_vector(&m_bv_blocks); + m_bv_chunks_select1.set_vector(&m_bv_chunks); + m_bv_blocks_select0.set_vector(&m_bv_blocks); + m_bv_chunks_select0.set_vector(&m_bv_chunks); + } + wt_gmr(wt_gmr && wt) : + m_bv_blocks(std::move(wt.m_bv_blocks)), + m_bv_chunks(std::move(wt.m_bv_chunks)), + m_perm(std::move(wt.m_perm)), + m_ips(std::move(wt.m_ips)), + m_bv_blocks_select1(std::move(wt.m_bv_blocks_select1)), + m_bv_chunks_select1(std::move(wt.m_bv_chunks_select1)), + m_bv_blocks_select0(std::move(wt.m_bv_blocks_select0)), + m_bv_chunks_select0(std::move(wt.m_bv_chunks_select0)), + m_size(wt.m_size), + m_max_symbol(wt.m_max_symbol), + m_chunks(wt.m_chunks), + m_chunksize(wt.m_chunksize), + m_sigma(wt.m_sigma) + { + m_ips.set_vector(&m_perm); + m_bv_blocks_select1.set_vector(&m_bv_blocks); + m_bv_chunks_select1.set_vector(&m_bv_chunks); + m_bv_blocks_select0.set_vector(&m_bv_blocks); + m_bv_chunks_select0.set_vector(&m_bv_chunks); + } + wt_gmr & operator=(wt_gmr const & wt) + { + wt_gmr tmp(wt); + *this = std::move(tmp); + return *this; + } + wt_gmr & operator=(wt_gmr && wt) + { + m_bv_blocks = std::move(wt.m_bv_blocks); + m_bv_chunks = std::move(wt.m_bv_chunks); + m_perm = std::move(wt.m_perm); + m_ips = std::move(wt.m_ips); + m_ips.set_vector(&m_perm); + m_bv_blocks_select1 = std::move(wt.m_bv_blocks_select1); + m_bv_blocks_select1.set_vector(&m_bv_blocks); + m_bv_chunks_select1 = std::move(wt.m_bv_chunks_select1); + m_bv_chunks_select1.set_vector(&m_bv_chunks); + m_bv_blocks_select0 = std::move(wt.m_bv_blocks_select0); + m_bv_blocks_select0.set_vector(&m_bv_blocks); + m_bv_chunks_select0 = std::move(wt.m_bv_chunks_select0); + m_bv_chunks_select0.set_vector(&m_bv_chunks); + m_size = wt.m_size; + m_max_symbol = wt.m_max_symbol; + m_chunks = wt.m_chunks; + m_chunksize = wt.m_chunksize; + m_sigma = wt.m_sigma; + return *this; + } + size_type size() const + { + return m_size; + } + bool empty() const + { + return m_size == 0; + } + value_type operator[](size_type i) const + { + assert(i < size()); + uint64_t chunk = i / m_chunksize; + uint64_t x = m_ips[i]; + return m_bv_chunks_select1(x + 1) - x - (chunk * m_max_symbol) - 1; + } + size_type rank(size_type i, value_type c) const + { + assert(i <= size()); + if (0 == i or c > m_max_symbol - 1) + { + return 0; + } + uint64_t chunk = (i - 1) / m_chunksize; + uint64_t ones_before_c = m_bv_blocks_select0(c * m_chunks + 1) - (c * m_chunks + 1) + 1; + uint64_t c_ones_before_chunk = + m_bv_blocks_select0(c * m_chunks + chunk + 1) - (c * m_chunks + chunk + 1) + 1 - ones_before_c; + uint64_t c_ones_in_chunk = 0; + auto begin = + m_perm.begin() + m_bv_chunks_select0(chunk * m_max_symbol + 1 + c) - (chunk * m_max_symbol + 1 + c) + 1; + auto end = + m_perm.begin() + m_bv_chunks_select0(chunk * m_max_symbol + 2 + c) - (chunk * m_max_symbol + 2 + c) + 1; + size_type val = (i - 1) % m_chunksize; + if (end - begin < 50) + { + c_ones_in_chunk = std::find_if(begin, + end, + [&val](auto const && x) + { + return x > val; + }) + - begin; + } + else + { + c_ones_in_chunk = std::lower_bound(begin, end, val + 1) - begin; + } + return c_ones_before_chunk + c_ones_in_chunk; + } + std::pair inverse_select(size_type i) const + { + assert(i < size()); + uint64_t chunk = i / m_chunksize; + uint64_t x = m_ips[i]; + uint64_t tmp = m_bv_chunks_select1(x + 1); + uint64_t c = tmp - x - (chunk * m_max_symbol) - 1; + uint64_t ones_before_c = m_bv_blocks_select0(c * m_chunks + 1) - (c * m_chunks + 1) + 1; + uint64_t c_before_chunk = + m_bv_blocks_select0(c * m_chunks + chunk + 1) - (c * m_chunks + chunk + 1) + 1 - ones_before_c; + uint64_t c_in_chunk = tmp - m_bv_chunks_select0(c + 1 + chunk * m_max_symbol) - 1; + return std::make_pair(c_before_chunk + c_in_chunk, c); + } + size_type select(size_type i, value_type c) const + { + assert(1 <= i and i <= rank(size(), c)); + uint64_t ones_before_c = m_bv_blocks_select0(c * m_chunks + 1) - (c * m_chunks); + uint64_t chunk = m_bv_blocks_select1(ones_before_c + i) - ones_before_c - (c * m_chunks + 1) - i + 1; + uint64_t c_ones_before_chunk = + m_bv_blocks_select0(c * m_chunks + chunk + 1) - (c * m_chunks + chunk) - ones_before_c; + uint64_t pi_pos = m_bv_chunks_select0(chunk * m_max_symbol + c + 1) + (i - c_ones_before_chunk) + - chunk * m_max_symbol - c - 1; + return m_perm[pi_pos] + chunk * m_chunksize; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += write_member(m_max_symbol, out, child, "max_symbol"); + written_bytes += write_member(m_chunks, out, child, "chunks"); + written_bytes += write_member(m_chunksize, out, child, "chunksize"); + written_bytes += write_member(m_sigma, out, child, "sigma"); + written_bytes += m_bv_blocks.serialize(out, child, "bv_blocks"); + written_bytes += m_bv_blocks_select0.serialize(out, child, "bv_blocks_select0"); + written_bytes += m_bv_blocks_select1.serialize(out, child, "bv_blocks_select1"); + written_bytes += m_bv_chunks.serialize(out, child, "bv_chunks"); + written_bytes += m_bv_chunks_select0.serialize(out, child, "bv_chunks_select0"); + written_bytes += m_bv_chunks_select1.serialize(out, child, "bv_chunks_select1"); + written_bytes += m_perm.serialize(out, child, "permutation"); + written_bytes += m_ips.serialize(out, child, "inverse_permutation_support"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(m_size, in); + read_member(m_max_symbol, in); + read_member(m_chunks, in); + read_member(m_chunksize, in); + read_member(m_sigma, in); + m_bv_blocks.load(in); + m_bv_blocks_select0.load(in, &m_bv_blocks); + m_bv_blocks_select1.load(in, &m_bv_blocks); + m_bv_chunks.load(in); + m_bv_chunks_select0.load(in, &m_bv_chunks); + m_bv_chunks_select1.load(in, &m_bv_chunks); + m_perm.load(in); + m_ips.load(in, &m_perm); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_max_symbol)); + ar(CEREAL_NVP(m_chunks)); + ar(CEREAL_NVP(m_chunksize)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_bv_blocks)); + ar(CEREAL_NVP(m_bv_blocks_select0)); + ar(CEREAL_NVP(m_bv_blocks_select1)); + ar(CEREAL_NVP(m_bv_chunks)); + ar(CEREAL_NVP(m_bv_chunks_select0)); + ar(CEREAL_NVP(m_bv_chunks_select1)); + ar(CEREAL_NVP(m_perm)); + ar(CEREAL_NVP(m_ips)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_max_symbol)); + ar(CEREAL_NVP(m_chunks)); + ar(CEREAL_NVP(m_chunksize)); + ar(CEREAL_NVP(m_sigma)); + ar(CEREAL_NVP(m_bv_blocks)); + ar(CEREAL_NVP(m_bv_blocks_select0)); + m_bv_blocks_select0.set_vector(&m_bv_blocks); + ar(CEREAL_NVP(m_bv_blocks_select1)); + m_bv_blocks_select1.set_vector(&m_bv_blocks); + ar(CEREAL_NVP(m_bv_chunks)); + ar(CEREAL_NVP(m_bv_chunks_select0)); + m_bv_chunks_select0.set_vector(&m_bv_chunks); + ar(CEREAL_NVP(m_bv_chunks_select1)); + m_bv_chunks_select1.set_vector(&m_bv_chunks); + ar(CEREAL_NVP(m_perm)); + ar(CEREAL_NVP(m_ips)); + m_ips.set_vector(&m_perm); + } + iterator begin() + { + return {this, 0}; + }; + const_iterator end() + { + return {this, size()}; + }; + iterator begin() const + { + return {this, 0}; + }; + const_iterator end() const + { + return {this, size()}; + }; + bool operator==(wt_gmr const & other) const noexcept + { + return (m_size == other.m_size) && (m_max_symbol == other.m_max_symbol) && (m_chunks == other.m_chunks) + && (m_chunksize == other.m_chunksize) && (m_sigma == other.m_sigma) && (m_bv_blocks == other.m_bv_blocks) + && (m_bv_blocks_select0 == other.m_bv_blocks_select0) && (m_bv_blocks_select1 == other.m_bv_blocks_select1) + && (m_bv_chunks == other.m_bv_chunks) && (m_bv_chunks_select0 == other.m_bv_chunks_select0) + && (m_bv_chunks_select1 == other.m_bv_chunks_select1) && (m_perm == other.m_perm) && (m_ips == other.m_ips); + } + bool operator!=(wt_gmr const & other) const noexcept + { + return !(*this == other); + } +}; +} +#endif +#ifndef INCLUDED_SDSL_WT_HUTU +#define INCLUDED_SDSL_WT_HUTU +#include +#include +#include +namespace sdsl +{ +struct hutu_shape; +template > +using wt_hutu = wt_pc; +template +struct _hutu_shape +{ + typedef typename t_wt::size_type size_type; + enum + { + lex_ordered = 1 + }; + template + struct heap_node + { + t_element * item; + heap_node *left, *right, *parent; + int64_t rank; + heap_node(t_element * it = nullptr) : item(it), left(nullptr), right(nullptr), parent(nullptr), rank(0) + {} + bool operator<(heap_node const & other) + { + return *item < *(other.item); + } + }; + template + class l_heap + { + private: + heap_node * m_root; + void fix_node(heap_node * item) + { + if (item != nullptr) + { + if (!item->left || !item->right) + { + if (item->rank != 0) + { + item->rank = 0; + if (item->parent) + fix_node(item->parent); + } + } + else + { + int64_t nn = (item->left->rank > item->right->rank) ? item->right->rank : item->left->rank; + if (item->rank != nn && item->parent != 0) + { + item->rank = nn; + fix_node(item->parent); + } + } + } + } + void free_node(heap_node * item) + { + if (item->left) + { + free_node(item->left); + delete item->left; + item->left = nullptr; + } + if (item->right) + { + free_node(item->right); + delete item->right; + item->right = nullptr; + } + } + heap_node * merge(heap_node * h1, heap_node * h2) + { + if (!h1) + return h2; + if (!h2) + return h1; + if (*(h1->item) < *(h2->item)) + return merge1(h1, h2); + else + return merge1(h2, h1); + } + heap_node * merge1(heap_node * h1, heap_node * h2) + { + if (!h1->left) + { + h1->left = h2; + h2->parent = h1; + } + else + { + h1->right = merge(h1->right, h2); + if (h1->right) + { + h1->right->parent = h1; + } + if ((h1->left->rank) < (h1->right->rank)) + { + heap_node * tmp = h1->left; + h1->left = h1->right; + h1->right = tmp; + } + h1->rank = h1->right->rank + 1; + } + return h1; + } + public: + l_heap() : m_root(nullptr) + {} + bool empty() const + { + return (m_root == nullptr); + } + heap_node * find_min() const + { + return m_root; + } + heap_node * find_snd_min() const + { + if (m_root == nullptr) + return nullptr; + if (m_root->left == nullptr) + return m_root->right; + if (m_root->right == nullptr) + return m_root->left; + if (m_root->left->operator<(*m_root->right)) + return m_root->left; + else + return m_root->right; + } + heap_node * insert(t_element * x) + { + heap_node * n = new heap_node(x); + l_heap lh; + lh.m_root = n; + merge(&lh); + return n; + } + void delete_min() + { + heap_node * old_root = m_root; + m_root = merge(m_root->left, m_root->right); + if (m_root) + m_root->parent = nullptr; + delete old_root; + } + void delete_element(heap_node * item) + { + if (item != nullptr) + { + if (m_root == item) + { + delete_min(); + } + else + { + heap_node * h1 = merge(item->left, item->right); + if (h1) + h1->parent = item->parent; + if (item == item->parent->left) + { + item->parent->left = h1; + } + else if (item == item->parent->right) + { + item->parent->right = h1; + } + fix_node(item->parent); + delete item; + } + } + } + void merge(l_heap * rhs) + { + m_root = merge(m_root, rhs->m_root); + rhs->m_root = nullptr; + } + void free_memory() + { + if (m_root != nullptr) + { + free_node(m_root); + delete m_root; + m_root = nullptr; + } + } + }; + struct ht_node; + struct m_node + { + size_type min_sum; + int64_t i; + int64_t j; + heap_node * qel; + l_heap * myhpq; + ht_node * lt; + ht_node * rt; + m_node() : qel(0), myhpq(0), lt(0), rt(0) + {} + bool operator<(const m_node other) + { + if (min_sum != other.min_sum) + { + return min_sum < other.min_sum; + } + if (i != other.i) + { + return i < other.i; + } + return j < other.j; + } + bool operator>(const m_node other) + { + return other < *this; + } + }; + struct ht_node + { + int64_t pos; + uint64_t c; + size_type w; + bool t; + int64_t level; + m_node * mpql; + m_node * mpqr; + heap_node * ql; + heap_node * qr; + ht_node * left; + ht_node * right; + ht_node() : mpql(0), mpqr(0), ql(0), qr(0), left(nullptr), right(nullptr) + {} + bool operator<(ht_node const & other) + { + if (w != other.w) + { + return w < other.w; + } + return pos < other.pos; + } + bool operator>(ht_node const & other) + { + return other < *this; + } + }; + template + static void construct_tree(t_rac & C, std::vector & temp_nodes) + { + std::vector node_vector; + for (size_t i = 0; i < C.size(); i++) + { + if (C[i]) + { + ht_node n; + n.c = (uint64_t)i; + n.w = C[i]; + n.t = true; + n.pos = node_vector.size(); + node_vector.push_back(n); + } + } + if (node_vector.size() == 1) + { + temp_nodes.emplace_back(pc_node(node_vector[0].w, (size_type)node_vector[0].c)); + return; + } + size_type sigma = node_vector.size(); + std::vector T(sigma); + std::vector A(sigma); + std::vector> HPQ(sigma); + l_heap MPQ; + T[0] = node_vector[0]; + A[0] = &T[0]; + for (size_type i = 1; i < sigma; i++) + { + T[i] = node_vector[i]; + A[i] = &T[i]; + T[i - 1].qr = HPQ[i - 1].insert(&T[i - 1]); + T[i].ql = HPQ[i - 1].insert(&T[i]); + m_node * m = new m_node(); + m->min_sum = T[i - 1].w + T[i].w; + m->i = i - 1; + m->j = i; + m->lt = &T[i - 1]; + m->rt = &T[i]; + m->myhpq = &HPQ[i - 1]; + m->qel = MPQ.insert(m); + T[i - 1].mpqr = m; + T[i].mpql = m; + } + for (size_type k = 1; k < sigma; k++) + { + m_node * m = MPQ.find_min()->item; + ht_node * l = A[m->i]; + ht_node * r = A[m->j]; + int64_t lpos = m->i; + int64_t rpos = m->j; + l_heap * n_hpq = nullptr; + ht_node * n_rt = nullptr; + ht_node * n_lt = nullptr; + m_node * n_m = new m_node(); + if (l->t) + { + if (l->mpql) + l->mpql->myhpq->delete_element(l->ql); + l->ql = nullptr; + if (l->mpqr) + l->mpqr->myhpq->delete_element(l->qr); + l->qr = nullptr; + } + else + { + m->myhpq->delete_element(l->ql); + l->ql = nullptr; + } + if (r->t) + { + if (r->mpql) + r->mpql->myhpq->delete_element(r->ql); + l->ql = nullptr; + if (r->mpqr) + r->mpqr->myhpq->delete_element(r->qr); + r->qr = nullptr; + } + else + { + m->myhpq->delete_element(r->ql); + r->ql = nullptr; + } + if (l->t && r->t) + { + l_heap * h1 = nullptr; + l_heap * h2 = nullptr; + l_heap * h3 = nullptr; + if (l->mpql) + { + n_lt = l->mpql->lt; + if (n_lt == l) + n_lt = nullptr; + if (n_lt) + n_lt->mpqr = n_m; + h1 = l->mpql->myhpq; + h2 = l->mpqr->myhpq; + h1->merge(h2); + MPQ.delete_element(l->mpql->qel); + MPQ.delete_element(l->mpqr->qel); + delete l->mpql; + delete l->mpqr; + } + else + { + h1 = l->mpqr->myhpq; + h2 = l->mpqr->myhpq; + n_lt = nullptr; + MPQ.delete_element(l->mpqr->qel); + delete l->mpqr; + } + if (r->mpqr) + { + n_rt = r->mpqr->rt; + if (n_rt == r) + n_rt = nullptr; + if (n_rt) + n_rt->mpql = n_m; + h3 = r->mpqr->myhpq; + h1->merge(h3); + MPQ.delete_element(r->mpqr->qel); + delete r->mpqr; + n_hpq = h1; + if (n_rt) + n_rt->mpql = n_m; + } + else + { + n_rt = nullptr; + n_hpq = h1; + } + } + else if (l->t) + { + if (l->mpql) + { + n_lt = l->mpql->lt; + if (n_lt) + n_lt->mpqr = n_m; + n_rt = l->mpqr->rt; + if (n_rt) + n_rt->mpql = n_m; + l->mpql->myhpq->merge(l->mpqr->myhpq); + n_hpq = l->mpql->myhpq; + MPQ.delete_element(l->mpql->qel); + MPQ.delete_element(l->mpqr->qel); + delete l->mpql; + delete l->mpqr; + } + else + { + n_lt = nullptr; + n_rt = l->mpqr->rt; + if (n_rt) + n_rt->mpql = n_m; + n_hpq = l->mpqr->myhpq; + MPQ.delete_element(l->mpqr->qel); + delete l->mpqr; + } + } + else if (r->t) + { + if (r->mpqr) + { + n_lt = r->mpql->lt; + if (n_lt) + n_lt->mpqr = n_m; + n_rt = r->mpqr->rt; + if (n_rt) + n_rt->mpql = n_m; + r->mpql->myhpq->merge(r->mpqr->myhpq); + n_hpq = r->mpql->myhpq; + MPQ.delete_element(r->mpql->qel); + MPQ.delete_element(r->mpqr->qel); + delete r->mpql; + delete r->mpqr; + } + else + { + n_lt = r->mpql->lt; + if (n_lt) + n_lt->mpqr = n_m; + n_rt = nullptr; + n_hpq = r->mpql->myhpq; + MPQ.delete_element(r->mpql->qel); + delete r->mpql; + } + } + else + { + MPQ.delete_element(m->qel); + n_hpq = m->myhpq; + n_lt = m->lt; + n_rt = m->rt; + if (n_lt) + n_lt->mpqr = n_m; + if (n_rt) + n_rt->mpql = n_m; + delete m; + } + ht_node * new_node = new ht_node(); + new_node->c = ' '; + new_node->w = l->w + r->w; + new_node->t = false; + new_node->pos = lpos; + new_node->left = l; + new_node->right = r; + new_node->ql = n_hpq->insert(new_node); + A[lpos] = new_node; + A[rpos] = nullptr; + ht_node * tmp_min = n_hpq->find_min()->item; + heap_node * tmpsnd = n_hpq->find_snd_min(); + if (tmpsnd) + { + ht_node * tmp_snd = n_hpq->find_snd_min()->item; + n_m->min_sum = tmp_min->w + tmp_snd->w; + if (tmp_min->pos < tmp_snd->pos) + { + n_m->i = tmp_min->pos; + n_m->j = tmp_snd->pos; + } + else + { + n_m->i = tmp_snd->pos; + n_m->j = tmp_min->pos; + } + n_m->qel = MPQ.insert(n_m); + n_m->myhpq = n_hpq; + n_m->lt = n_lt; + n_m->rt = n_rt; + } + else + { + n_hpq->free_memory(); + delete n_m; + } + } + assign_level(A[0], 0); + std::vector stack(sigma, nullptr); + for (size_type i = 0; i < sigma; i++) + { + temp_nodes.emplace_back(pc_node(T[i].w, (size_type)T[i].c)); + T[i].pos = i; + } + int64_t spointer = -1; + uint64_t qpointer = 0; + while (qpointer < sigma or spointer >= 1LL) + { + if (spointer >= 1LL and (stack[spointer]->level == stack[spointer - 1]->level)) + { + ht_node * n_node = new ht_node(); + n_node->t = false; + n_node->left = stack[spointer - 1]; + n_node->right = stack[spointer]; + n_node->level = stack[spointer]->level - 1; + n_node->w = stack[spointer]->w + stack[spointer - 1]->w; + n_node->c = '|'; + n_node->pos = temp_nodes.size(); + temp_nodes[stack[spointer - 1]->pos].parent = temp_nodes.size(); + temp_nodes[stack[spointer]->pos].parent = temp_nodes.size(); + temp_nodes.emplace_back( + pc_node(n_node->w, 0, pc_node::undef, stack[spointer - 1]->pos, stack[spointer]->pos)); + if (!stack[spointer - 1]->t) + delete stack[spointer - 1]; + if (!stack[spointer]->t) + delete stack[spointer]; + stack[--spointer] = n_node; + } + else + { + stack[++spointer] = &T[qpointer++]; + } + } + delete stack[0]; + } + static void assign_level(ht_node * n, int64_t lvl) + { + if (n) + { + n->level = lvl; + assign_level(n->left, lvl + 1); + assign_level(n->right, lvl + 1); + if (!n->t) + { + delete n; + } + } + } +}; +struct hutu_shape +{ + template + using type = _hutu_shape; +}; +} +#endif +#ifndef INCLUDED_SDSL_WT_RLMN +#define INCLUDED_SDSL_WT_RLMN +#include +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +template +struct wt_rlmn_trait +{ + enum + { + width = 0 + }; + typedef int_vector<> C_type; + typedef int_vector<> C_bf_rank_type; + static std::map temp_C() + { + return std::map(); + } + static C_type init_C(std::map & C, uint64_t size) + { + uint64_t max_symbol = (--C.end())->first; + return C_type(max_symbol + 1, 0, bits::hi(size) + 1); + } + static C_bf_rank_type init_C_bf_rank(C_type const & C, uint64_t size) + { + return C_bf_rank_type(C.size(), 0, bits::hi(size) + 1); + } +}; +template <> +struct wt_rlmn_trait +{ + enum + { + width = 8 + }; + typedef int_vector<64> C_type; + typedef int_vector<64> C_bf_rank_type; + static int_vector<64> temp_C() + { + return int_vector<64>(256, 0); + } + static C_type init_C(C_type & C, uint64_t) + { + return C; + } + static C_bf_rank_type init_C_bf_rank(C_type const &, uint64_t) + { + return int_vector<64>(256, 0); + } +}; +template , + class t_rank = typename t_bitvector::rank_1_type, + class t_select = typename t_bitvector::select_1_type, + class t_wt = wt_huff<>> +class wt_rlmn +{ +public: + typedef t_wt wt_type; + typedef int_vector<>::size_type size_type; + typedef typename t_wt::value_type value_type; + typedef typename t_bitvector::difference_type difference_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef t_bitvector bit_vector_type; + typedef t_rank rank_support_type; + typedef t_select select_support_type; + typedef wt_tag index_category; + typedef typename t_wt::alphabet_category alphabet_category; + enum + { + lex_ordered = false + }; + enum + { + width = wt_rlmn_trait::width + }; + typedef typename wt_rlmn_trait::C_type C_type; + typedef typename wt_rlmn_trait::C_bf_rank_type C_bf_rank_type; +private: + size_type m_size = 0; + bit_vector_type m_bl; + bit_vector_type m_bf; + wt_type m_wt; + rank_support_type m_bl_rank; + rank_support_type m_bf_rank; + select_support_type m_bl_select; + select_support_type m_bf_select; + C_type m_C; + C_bf_rank_type m_C_bf_rank; +public: + size_type const & sigma = m_wt.sigma; + wt_rlmn() = default; + template + wt_rlmn(t_it begin, t_it end, std::string tmp_dir = ram_file_name("")) : m_size(std::distance(begin, end)) + { + std::string temp_file = + tmp_dir + +"_wt_rlmn_" + util::to_string(util::pid()) + "_" + util::to_string(util::id()); + { + if (0 == m_size) + return; + int_vector_buffer condensed_wt(temp_file, std::ios::out); + bit_vector bl = bit_vector(m_size, 0); + auto C = wt_rlmn_trait::temp_C(); + value_type last_c = (value_type)0; + size_type j = 0; + for (auto it = begin; it != end; ++it, ++j) + { + value_type c = *it; + if (last_c != c or it == begin) + { + bl[j] = 1; + condensed_wt.push_back(c); + } + ++C[c]; + last_c = c; + } + condensed_wt.close(); + m_C = wt_rlmn_trait::init_C(C, m_size); + for (size_type i = 0, prefix_sum = 0; i < m_C.size(); ++i) + { + m_C[i] = prefix_sum; + prefix_sum += C[i]; + } + C_type lf_map = m_C; + bit_vector bf = bit_vector(m_size + 1, 0); + bf[m_size] = 1; + j = 0; + for (auto it = begin; it != end; ++it, ++j) + { + value_type c = *it; + if (bl[j]) + { + bf[lf_map[c]] = 1; + } + ++lf_map[c]; + } + { + int_vector_buffer temp_bwt_buf(temp_file); + m_wt = wt_type(temp_bwt_buf.begin(), temp_bwt_buf.end(), tmp_dir); + } + sdsl::remove(temp_file); + m_bl = bit_vector_type(std::move(bl)); + m_bf = bit_vector_type(std::move(bf)); + } + util::init_support(m_bl_rank, &m_bl); + util::init_support(m_bf_rank, &m_bf); + util::init_support(m_bf_select, &m_bf); + util::init_support(m_bl_select, &m_bl); + m_C_bf_rank = wt_rlmn_trait::init_C_bf_rank(m_C, m_size); + for (size_type i = 0; i < m_C.size(); ++i) + { + m_C_bf_rank[i] = m_bf_rank(m_C[i]); + } + } + wt_rlmn(wt_rlmn const & wt) : + m_size(wt.m_size), + m_bl(wt.m_bl), + m_bf(wt.m_bf), + m_wt(wt.m_wt), + m_bl_rank(wt.m_bl_rank), + m_bf_rank(wt.m_bf_rank), + m_bl_select(wt.m_bl_select), + m_bf_select(wt.m_bf_select), + m_C(wt.m_C), + m_C_bf_rank(wt.m_C_bf_rank) + { + m_bl_rank.set_vector(&m_bl); + m_bf_rank.set_vector(&m_bf); + m_bl_select.set_vector(&m_bl); + m_bf_select.set_vector(&m_bf); + } + wt_rlmn(wt_rlmn && wt) : + m_size(wt.m_size), + m_bl(std::move(wt.m_bl)), + m_bf(std::move(wt.m_bf)), + m_wt(std::move(wt.m_wt)), + m_bl_rank(std::move(wt.m_bl_rank)), + m_bf_rank(std::move(wt.m_bf_rank)), + m_bl_select(std::move(wt.m_bl_select)), + m_bf_select(std::move(wt.m_bf_select)), + m_C(std::move(wt.m_C)), + m_C_bf_rank(std::move(wt.m_C_bf_rank)) + { + m_bl_rank.set_vector(&m_bl); + m_bf_rank.set_vector(&m_bf); + m_bl_select.set_vector(&m_bl); + m_bf_select.set_vector(&m_bf); + } + wt_rlmn & operator=(wt_rlmn const & wt) + { + if (this != &wt) + { + wt_rlmn tmp(wt); + *this = std::move(tmp); + } + return *this; + } + wt_rlmn & operator=(wt_rlmn && wt) + { + if (this != &wt) + { + m_size = std::move(wt.m_size); + m_bl = std::move(wt.m_bl); + m_bf = std::move(wt.m_bf); + m_wt = std::move(wt.m_wt); + m_bl_rank = std::move(wt.m_bl_rank); + m_bl_rank.set_vector(&m_bl); + m_bf_rank = std::move(wt.m_bf_rank); + m_bf_rank.set_vector(&m_bf); + m_bl_select = std::move(wt.m_bl_select); + m_bl_select.set_vector(&m_bl); + m_bf_select = std::move(wt.m_bf_select); + m_bf_select.set_vector(&m_bf); + m_C = std::move(wt.m_C); + m_C_bf_rank = std::move(wt.m_C_bf_rank); + } + return *this; + } + size_type size() const + { + return m_size; + } + bool empty() const + { + return 0 == m_size; + } + value_type operator[](size_type i) const + { + assert(i < size()); + return m_wt[m_bl_rank(i + 1) - 1]; + }; + size_type rank(size_type i, value_type c) const + { + assert(i <= size()); + if (i == 0) + return 0; + size_type wt_ex_pos = m_bl_rank(i); + size_type c_runs = m_wt.rank(wt_ex_pos, c); + if (c_runs == 0) + return 0; + if (m_wt[wt_ex_pos - 1] == c) + { + size_type c_run_begin = m_bl_select(wt_ex_pos); + return m_bf_select(m_C_bf_rank[c] + c_runs) - m_C[c] + i - c_run_begin; + } + else + { + return m_bf_select(m_C_bf_rank[c] + c_runs + 1) - m_C[c]; + } + }; + std::pair inverse_select(size_type i) const + { + assert(i < size()); + if (i == 0) + { + return std::make_pair(0, m_wt[0]); + } + size_type wt_ex_pos = m_bl_rank(i + 1); + auto rc = m_wt.inverse_select(wt_ex_pos - 1); + size_type c_runs = rc.first + 1; + value_type c = rc.second; + if (c_runs == 0) + return std::make_pair(0, c); + if (m_wt[wt_ex_pos - 1] == c) + { + size_type c_run_begin = m_bl_select(wt_ex_pos); + return std::make_pair(m_bf_select(m_C_bf_rank[c] + c_runs) - m_C[c] + i - c_run_begin, c); + } + else + { + return std::make_pair(m_bf_select(m_C_bf_rank[c] + c_runs + 1) - m_C[c], c); + } + } + size_type select(size_type i, value_type c) const + { + assert(i > 0); + assert(i <= rank(size(), c)); + size_type c_runs = m_bf_rank(m_C[c] + i) - m_C_bf_rank[c]; + size_type offset = m_C[c] + i - 1 - m_bf_select(c_runs + m_C_bf_rank[c]); + return m_bl_select(m_wt.select(c_runs, c) + 1) + offset; + }; + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += m_bl.serialize(out, child, "bl"); + written_bytes += m_bf.serialize(out, child, "bf"); + written_bytes += m_wt.serialize(out, child, "wt"); + written_bytes += m_bl_rank.serialize(out, child, "bl_rank"); + written_bytes += m_bf_rank.serialize(out, child, "bf_rank"); + written_bytes += m_bl_select.serialize(out, child, "bl_select"); + written_bytes += m_bf_select.serialize(out, child, "bf_select"); + written_bytes += m_C.serialize(out, child, "C"); + written_bytes += m_C_bf_rank.serialize(out, child, "C_bf_rank"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(m_size, in); + m_bl.load(in); + m_bf.load(in); + m_wt.load(in); + m_bl_rank.load(in, &m_bl); + m_bf_rank.load(in, &m_bf); + m_bl_select.load(in, &m_bl); + m_bf_select.load(in, &m_bf); + m_C.load(in); + m_C_bf_rank.load(in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_bl)); + ar(CEREAL_NVP(m_bf)); + ar(CEREAL_NVP(m_wt)); + ar(CEREAL_NVP(m_bl_rank)); + ar(CEREAL_NVP(m_bf_rank)); + ar(CEREAL_NVP(m_bl_select)); + ar(CEREAL_NVP(m_bf_select)); + ar(CEREAL_NVP(m_C)); + ar(CEREAL_NVP(m_C_bf_rank)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_bl)); + ar(CEREAL_NVP(m_bf)); + ar(CEREAL_NVP(m_wt)); + ar(CEREAL_NVP(m_bl_rank)); + m_bl_rank.set_vector(&m_bl); + ar(CEREAL_NVP(m_bf_rank)); + m_bf_rank.set_vector(&m_bf); + ar(CEREAL_NVP(m_bl_select)); + m_bl_select.set_vector(&m_bl); + ar(CEREAL_NVP(m_bf_select)); + m_bf_select.set_vector(&m_bf); + ar(CEREAL_NVP(m_C)); + ar(CEREAL_NVP(m_C_bf_rank)); + } + bool operator==(wt_rlmn const & other) const noexcept + { + return (m_size == other.m_size) && (m_bl == other.m_bl) && (m_bf == other.m_bf) && (m_wt == other.m_wt) + && (m_bl_rank == other.m_bl_rank) && (m_bf_rank == other.m_bf_rank) && (m_bl_select == other.m_bl_select) + && (m_bf_select == other.m_bf_select) && (m_C == other.m_C) && (m_C_bf_rank == other.m_C_bf_rank); + } + bool operator!=(wt_rlmn const & other) const noexcept + { + return !(*this == other); + } +}; +} +#endif +namespace sdsl +{ +struct balanced_shape; +struct huff_shape; +struct hutu_shape; +template +using wt_hutu_int = wt_pc>; +template +using wt_huff_int = wt_pc>; +template +using wt_blcd_int = wt_pc>; +} +#endif +namespace sdsl +{ +template , + uint32_t t_dens = 32, + uint32_t t_inv_dens = 64, + class t_sa_sample_strat = sa_order_sa_sampling<>, + class t_isa_sample_strat = isa_sampling<>> +using csa_wt_int = csa_wt>; +template , + uint32_t t_dens = 32, + uint32_t t_inv_dens = 64, + class t_sa_sample_strat = sa_order_sa_sampling<>, + class t_isa_sample_strat = isa_sampling<> + > +using csa_sada_int = csa_sada>; +} +#endif +#ifndef INCLUDED_SDSL_SUFFIX_TREES +#define INCLUDED_SDSL_SUFFIX_TREES +#ifndef INCLUDED_SDSL_CST_FULLY +#define INCLUDED_SDSL_CST_FULLY +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_BP_SUPPORT_SADA +#define INCLUDED_SDSL_BP_SUPPORT_SADA +#include +#include +#ifndef INCLUDED_SDSL_BP_SUPPORT_ALGORITHM +#define INCLUDED_SDSL_BP_SUPPORT_ALGORITHM +#include +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_SORTED_STACK_SUPPORT +#define INCLUDED_SDSL_SORTED_STACK_SUPPORT +#include +#include +#include +#include +namespace sdsl +{ +class sorted_stack_support +{ +public: + typedef int_vector<64>::size_type size_type; +private: + size_type m_n; + size_type m_cnt; + size_type m_top; + int_vector<64> m_stack; + inline size_type block_nr(size_type x) + { + return x / 63; + }; + inline size_type block_pos(size_type x) + { + return x % 63; + }; +public: + sorted_stack_support(size_type n); + sorted_stack_support(sorted_stack_support const &) = default; + sorted_stack_support(sorted_stack_support &&) = default; + sorted_stack_support & operator=(sorted_stack_support const &) = default; + sorted_stack_support & operator=(sorted_stack_support &&) = default; + bool empty() const + { + return 0 == m_cnt; + }; + size_type top() const; + void pop(); + void push(size_type x); + size_type size() const + { + return m_cnt; + }; + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const; + void load(std::istream & in); + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const; + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar); + bool operator==(sorted_stack_support const & other) const noexcept; + bool operator!=(sorted_stack_support const & other) const noexcept; +}; +inline sorted_stack_support::sorted_stack_support(size_type n) : m_n(n), m_cnt(0), m_top(0), m_stack() +{ + m_stack = int_vector<64>(block_nr(m_n + 1) + 1, 0); + m_stack[0] = 1; +} +inline sorted_stack_support::size_type sorted_stack_support::top() const +{ + assert(empty() == false); + return m_top - 1; +} +inline void sorted_stack_support::push(size_type x) +{ + assert((empty() or top() < x) and x <= m_n); + x += 1; + ++m_cnt; + size_type bn = block_nr(x); + m_stack[bn] ^= (1ULL << block_pos(x)); + if (bn > 0 and m_stack[bn - 1] == 0) + { + m_stack[bn - 1] = 0x8000000000000000ULL | m_top; + } + m_top = x; +} +inline void sorted_stack_support::pop() +{ + if (!empty()) + { + --m_cnt; + size_type bn = block_nr(m_top); + uint64_t w = m_stack[bn]; + assert((w >> 63) == 0); + w ^= (1ULL << block_pos(m_top)); + m_stack[bn] = w; + if (w > 0) + { + m_top = bn * 63 + bits::hi(w); + } + else + { + assert(bn > 0); + w = m_stack[bn - 1]; + if ((w >> 63) == 0) + { + assert(w > 0); + m_top = (bn - 1) * 63 + bits::hi(w); + } + else + { + m_stack[bn - 1] = 0; + m_top = w & 0x7FFFFFFFFFFFFFFFULL; + } + } + } +} +inline sorted_stack_support::size_type +sorted_stack_support::serialize(std::ostream & out, structure_tree_node * v, std::string name) const +{ + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_n, out); + written_bytes += write_member(m_top, out); + written_bytes += write_member(m_cnt, out); + written_bytes += m_stack.serialize(out); + structure_tree::add_size(child, written_bytes); + return written_bytes; +} +inline void sorted_stack_support::load(std::istream & in) +{ + read_member(m_n, in); + read_member(m_top, in); + read_member(m_cnt, in); + m_stack.load(in); +} +template +void sorted_stack_support::CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const +{ + ar(CEREAL_NVP(m_n)); + ar(CEREAL_NVP(m_cnt)); + ar(CEREAL_NVP(m_top)); + ar(CEREAL_NVP(m_stack)); +} +template +void sorted_stack_support::CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) +{ + ar(CEREAL_NVP(m_n)); + ar(CEREAL_NVP(m_cnt)); + ar(CEREAL_NVP(m_top)); + ar(CEREAL_NVP(m_stack)); +} +inline bool sorted_stack_support::operator==(sorted_stack_support const & other) const noexcept +{ + return (m_n == other.m_n) && (m_cnt == other.m_cnt) && (m_top == other.m_top) && (m_stack == other.m_stack); +} +inline bool sorted_stack_support::operator!=(sorted_stack_support const & other) const noexcept +{ + return !(*this == other); +} +} +#endif +namespace sdsl +{ +template +struct excess +{ + struct impl + { + uint8_t near_fwd_pos[(8 - (-8)) * 256]; + uint8_t near_bwd_pos[(8 - (-8)) * 256]; + int8_t word_sum[256]; + int8_t min[256]; + int8_t min_pos_max[256]; + uint32_t min_match_pos_packed[256]; + uint32_t max_match_pos_packed[256]; + uint16_t min_open_excess_info[256]; + impl() + { + for (int32_t x = -8; x < 8; ++x) + { + for (uint16_t w = 0; w < 256; ++w) + { + uint16_t i = (x + 8) << 8 | w; + near_fwd_pos[i] = 8; + int8_t p = 0; + int8_t excess = 0; + do + { + excess += 1 - 2 * ((w & (1 << p)) == 0); + if (excess == x) + { + near_fwd_pos[i] = p; + break; + } + ++p; + } + while (p < 8); + near_bwd_pos[i] = 8; + p = 7; + excess = 0; + do + { + excess += 1 - 2 * ((w & (1 << p)) > 0); + if (excess == x) + { + near_bwd_pos[i] = p; + break; + } + --p; + } + while (p > -1); + } + } + int_vector<> packed_mins(1, 0, 32); + int_vector<> packed_maxs(1, 0, 32); + for (uint16_t w = 0; w < 256; ++w) + { + int8_t excess = 0; + int8_t rev_excess = 0; + int32_t min_excess_of_open = 17; + int32_t min_excess_of_open_pos = 0; + uint32_t ones = 0; + min[w] = 8; + packed_mins[0] = 0x99999999U; + packed_maxs[0] = 0x99999999U; + packed_mins.width(4); + packed_maxs.width(4); + for (uint16_t p = 0; p < 8; ++p) + { + ones += (w & (1 << p)) != 0; + excess += 1 - 2 * ((w & (1 << p)) == 0); + if (excess <= min[w]) + { + min[w] = excess; + min_pos_max[w] = p; + } + if (excess < 0 and packed_mins[-excess - 1] == 9) + { + packed_mins[-excess - 1] = p; + } + if (w & (1 << p) and excess + 8 <= min_excess_of_open) + { + min_excess_of_open = excess + 8; + min_excess_of_open_pos = p; + } + rev_excess += 1 - 2 * ((w & (1 << (7 - p))) > 0); + if (rev_excess < 0 and packed_maxs[-rev_excess - 1] == 9) + { + packed_maxs[-rev_excess - 1] = 7 - p; + } + } + word_sum[w] = excess; + packed_mins.width(32); + min_match_pos_packed[w] = packed_mins[0]; + packed_maxs.width(32); + max_match_pos_packed[w] = packed_maxs[0]; + min_open_excess_info[w] = (min_excess_of_open) | (min_excess_of_open_pos << 8) | (ones << 12); + } + } + }; + static impl data; +}; +template +typename excess::impl excess::data; +inline bit_vector calculate_pioneers_bitmap(bit_vector const & bp, uint64_t block_size) +{ + bit_vector pioneer_bitmap(bp.size(), 0); + std::stack opening_parenthesis; + uint64_t blocks = (bp.size() + block_size - 1) / block_size; + for (uint64_t block_nr = 0; block_nr < blocks; ++block_nr) + { + std::map block_and_position; + std::map matching_position; + for (uint64_t i = 0, j = block_nr * block_size; i < block_size and j < bp.size(); ++i, ++j) + { + if (bp[j]) + { + opening_parenthesis.push(j); + } + else + { + uint64_t position = opening_parenthesis.top(); + uint64_t blockpos = position / block_size; + opening_parenthesis.pop(); + block_and_position[blockpos] = position; + matching_position[blockpos] = j; + } + } + for (std::map::const_iterator it = block_and_position.begin(), + end = block_and_position.end(), + mit = matching_position.begin(); + it != end and it->first != block_nr; + ++it, ++mit) + { + pioneer_bitmap[it->second] = 1; + pioneer_bitmap[mit->second] = 1; + } + } + assert(opening_parenthesis.empty()); + return pioneer_bitmap; +} +inline bit_vector calculate_pioneers_bitmap_succinct(bit_vector const & bp, uint64_t block_size) +{ + bit_vector pioneer_bitmap(bp.size(), 0); + sorted_stack_support opening_parenthesis(bp.size()); + uint64_t cur_pioneer_block = 0, last_start = 0, last_j = 0, first_index_in_block = 0; + for (uint64_t j = 0, new_block = block_size; j < bp.size(); ++j, --new_block) + { + if (!(new_block)) + { + cur_pioneer_block = j / block_size; + first_index_in_block = j; + new_block = block_size; + } + if (bp[j]) + { + if ( + new_block > 1 and !bp[j + 1]) + { + ++j; + --new_block; + continue; + } + opening_parenthesis.push(j); + } + else + { + assert(!opening_parenthesis.empty()); + uint64_t start = opening_parenthesis.top(); + opening_parenthesis.pop(); + if (start < first_index_in_block) + { + if ((start / block_size) == cur_pioneer_block) + { + pioneer_bitmap[last_start] = pioneer_bitmap[last_j] = 0; + } + pioneer_bitmap[start] = pioneer_bitmap[j] = 1; + cur_pioneer_block = start / block_size; + last_start = start; + last_j = j; + } + } + } + assert(opening_parenthesis.empty()); + return pioneer_bitmap; +} +template +void calculate_matches(bit_vector const & bp, int_vector & matches) +{ + matches = int_vector(bp.size(), 0, bits::hi(bp.size()) + 1); + std::stack opening_parenthesis; + for (uint64_t i = 0; i < bp.size(); ++i) + { + if (bp[i]) + { + opening_parenthesis.push(i); + } + else + { + assert(!opening_parenthesis.empty()); + uint64_t position = opening_parenthesis.top(); + opening_parenthesis.pop(); + matches[i] = position; + assert(matches[i] == position); + matches[position] = i; + assert(matches[position] == i); + } + } + assert(opening_parenthesis.empty()); +} +template +void calculate_enclose(bit_vector const & bp, int_vector & enclose) +{ + enclose = int_vector(bp.size(), 0, bits::hi(bp.size()) + 1); + std::stack opening_parenthesis; + for (uint64_t i = 0; i < bp.size(); ++i) + { + if (bp[i]) + { + if (!opening_parenthesis.empty()) + { + uint64_t position = opening_parenthesis.top(); + enclose[i] = position; + assert(enclose[i] == position); + } + else + enclose[i] = bp.size(); + opening_parenthesis.push(i); + } + else + { + uint64_t position = opening_parenthesis.top(); + enclose[i] = position; + opening_parenthesis.pop(); + } + } + assert(opening_parenthesis.empty()); +} +inline uint64_t near_find_close(bit_vector const & bp, const uint64_t i, const uint64_t block_size) +{ + typedef bit_vector::difference_type difference_type; + difference_type excess_v = 1; + const uint64_t end = ((i + 1) / block_size + 1) * block_size; + const uint64_t l = (((i + 1) + 7) / 8) * 8; + const uint64_t r = (end / 8) * 8; + for (uint64_t j = i + 1; j < std::min(end, l); ++j) + { + if (bp[j]) + ++excess_v; + else + { + --excess_v; + if (excess_v == 0) + { + return j; + } + } + } + uint64_t const * b = bp.data(); + for (uint64_t j = l; j < r; j += 8) + { + if (excess_v <= 8) + { + assert(excess_v > 0); + uint32_t x = excess<>::data.min_match_pos_packed[((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF]; + uint8_t p = (x >> ((excess_v - 1) << 2)) & 0xF; + if (p < 9) + { + return j + p; + } + } + excess_v += excess<>::data.word_sum[((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF]; + } + for (uint64_t j = std::max(l, r); j < end; ++j) + { + if (bp[j]) + ++excess_v; + else + { + --excess_v; + if (excess_v == 0) + { + return j; + } + } + } + return i; +} +inline uint64_t near_find_closing(bit_vector const & bp, uint64_t i, uint64_t closings, const uint64_t block_size) +{ + typedef bit_vector::difference_type difference_type; + difference_type excess_v = 0; + difference_type succ_excess = -closings; + const uint64_t end = (i / block_size + 1) * block_size; + const uint64_t l = (((i) + 7) / 8) * 8; + const uint64_t r = (end / 8) * 8; + for (uint64_t j = i; j < std::min(end, l); ++j) + { + if (bp[j]) + ++excess_v; + else + { + --excess_v; + if (excess_v == succ_excess) + { + return j; + } + } + } + uint64_t const * b = bp.data(); + for (uint64_t j = l; j < r; j += 8) + { + if (excess_v - succ_excess <= 8) + { + uint32_t x = excess<>::data.min_match_pos_packed[((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF]; + uint8_t p = (x >> (((excess_v - succ_excess) - 1) << 2)) & 0xF; + if (p < 9) + { + return j + p; + } + } + excess_v += excess<>::data.word_sum[((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF]; + } + for (uint64_t j = std::max(l, r); j < end; ++j) + { + if (bp[j]) + ++excess_v; + else + { + --excess_v; + if (excess_v == succ_excess) + { + return j; + } + } + } + return i - 1; +} +inline uint64_t +near_fwd_excess(bit_vector const & bp, uint64_t i, bit_vector::difference_type rel, const uint64_t block_size) +{ + typedef bit_vector::difference_type difference_type; + difference_type excess_v = rel; + const uint64_t end = (i / block_size + 1) * block_size; + const uint64_t l = (((i) + 7) / 8) * 8; + const uint64_t r = (end / 8) * 8; + for (uint64_t j = i; j < std::min(end, l); ++j) + { + excess_v += 1 - 2 * bp[j]; + if (!excess_v) + { + return j; + } + } + excess_v += 8; + uint64_t const * b = bp.data(); + for (uint64_t j = l; j < r; j += 8) + { + if (excess_v >= 0 and excess_v <= 16) + { + uint32_t x = excess<>::data.near_fwd_pos[(excess_v << 8) + (((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF)]; + if (x < 8) + { + return j + x; + } + } + excess_v -= excess<>::data.word_sum[((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF]; + } + excess_v -= 8; + for (uint64_t j = std::max(l, r); j < end; ++j) + { + excess_v += 1 - 2 * bp[j]; + if (!excess_v) + { + return j; + } + } + return i - 1; +} +inline uint64_t near_rmq(bit_vector const & bp, uint64_t l, uint64_t r, bit_vector::difference_type & min_rel_ex) +{ + typedef bit_vector::difference_type difference_type; + const uint64_t l8 = (((l + 1) + 7) / 8) * 8; + const uint64_t r8 = (r / 8) * 8; + difference_type excess_v = 0; + difference_type min_pos = l; + min_rel_ex = 0; + for (uint64_t j = l + 1; j < std::min(l8, r + 1); ++j) + { + if (bp[j]) + ++excess_v; + else + { + --excess_v; + if (excess_v <= min_rel_ex) + { + min_rel_ex = excess_v; + min_pos = j; + } + } + } + uint64_t const * b = bp.data(); + for (uint64_t j = l8; j < r8; j += 8) + { + int8_t x = excess<>::data.min[(((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF)]; + if ((excess_v + x) <= min_rel_ex) + { + min_rel_ex = excess_v + x; + min_pos = j + excess<>::data.min_pos_max[(((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF)]; + } + excess_v += excess<>::data.word_sum[((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF]; + } + for (uint64_t j = std::max(l8, r8); j < r + 1; ++j) + { + if (bp[j]) + ++excess_v; + else + { + --excess_v; + if (excess_v <= min_rel_ex) + { + min_rel_ex = excess_v; + min_pos = j; + } + } + } + return min_pos; +} +inline uint64_t +near_bwd_excess(bit_vector const & bp, uint64_t i, bit_vector::difference_type rel, const uint64_t block_size) +{ + typedef bit_vector::difference_type difference_type; + difference_type excess_v = rel; + const difference_type begin = ((difference_type)(i) / block_size) * block_size; + const difference_type r = ((difference_type)(i) / 8) * 8; + const difference_type l = ((difference_type)((begin + 7) / 8)) * 8; + for (difference_type j = i + 1; j >= std::max(r, begin); --j) + { + if (bp[j]) + ++excess_v; + else + --excess_v; + if (!excess_v) + return j - 1; + } + excess_v += 8; + uint64_t const * b = bp.data(); + for (difference_type j = r - 8; j >= l; j -= 8) + { + if (excess_v >= 0 and excess_v <= 16) + { + uint32_t x = excess<>::data.near_bwd_pos[(excess_v << 8) + (((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF)]; + if (x < 8) + { + return j + x - 1; + } + } + excess_v += excess<>::data.word_sum[((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF]; + } + excess_v -= 8; + for (difference_type j = std::min(l, r); j > begin; --j) + { + if (bp[j]) + ++excess_v; + else + --excess_v; + if (!excess_v) + return j - 1; + } + if (0 == begin and -1 == rel) + { + return -1; + } + return i + 1; +} +inline uint64_t near_find_open(bit_vector const & bp, uint64_t i, const uint64_t block_size) +{ + typedef bit_vector::difference_type difference_type; + difference_type excess_v = -1; + const difference_type begin = ((difference_type)(i - 1) / block_size) * block_size; + const difference_type r = ((difference_type)(i - 1) / 8) * 8; + const difference_type l = ((difference_type)((begin + 7) / 8)) * 8; + for (difference_type j = i - 1; j >= std::max(r, begin); --j) + { + if (bp[j]) + { + if (++excess_v == 0) + { + return j; + } + } + else + --excess_v; + } + uint64_t const * b = bp.data(); + for (difference_type j = r - 8; j >= l; j -= 8) + { + if (excess_v >= -8) + { + assert(excess_v < 0); + uint32_t x = excess<>::data.max_match_pos_packed[((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF]; + uint8_t p = (x >> ((-excess_v - 1) << 2)) & 0xF; + if (p < 9) + { + return j + p; + } + } + excess_v += excess<>::data.word_sum[((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF]; + } + for (difference_type j = std::min(l, r) - 1; j >= begin; --j) + { + if (bp[j]) + { + if (++excess_v == 0) + { + return j; + } + } + else + --excess_v; + } + return i; +} +inline uint64_t near_find_opening(bit_vector const & bp, uint64_t i, const uint64_t openings, const uint64_t block_size) +{ + typedef bit_vector::difference_type difference_type; + difference_type excess_v = 0; + difference_type succ_excess = openings; + const difference_type begin = ((difference_type)(i) / block_size) * block_size; + const difference_type r = ((difference_type)(i) / 8) * 8; + const difference_type l = ((difference_type)((begin + 7) / 8)) * 8; + for (difference_type j = i; j >= std::max(r, begin); --j) + { + if (bp[j]) + { + if (++excess_v == succ_excess) + { + return j; + } + } + else + --excess_v; + } + uint64_t const * b = bp.data(); + for (difference_type j = r - 8; j >= l; j -= 8) + { + if (succ_excess - excess_v <= 8) + { + assert(succ_excess - excess_v > 0); + uint32_t x = excess<>::data.max_match_pos_packed[((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF]; + uint8_t p = (x >> ((succ_excess - excess_v - 1) << 2)) & 0xF; + if (p < 9) + { + return j + p; + } + } + excess_v += excess<>::data.word_sum[((*(b + (j >> 6))) >> (j & 0x3F)) & 0xFF]; + } + for (difference_type j = std::min(l, r) - 1; j >= begin; --j) + { + if (bp[j]) + { + if (++excess_v == succ_excess) + { + return j; + } + } + else + --excess_v; + } + return i + 1; +} +inline uint64_t near_enclose(bit_vector const & bp, uint64_t i, const uint64_t block_size) +{ + uint64_t opening_parentheses = 1; + for (uint64_t j = i; j + block_size - 1 > i and j > 0; --j) + { + if (bp[j - 1]) + { + ++opening_parentheses; + if (opening_parentheses == 2) + { + return j - 1; + } + } + else + --opening_parentheses; + } + return i; +} +inline uint64_t near_rmq_open(bit_vector const & bp, const uint64_t begin, const uint64_t end) +{ + typedef bit_vector::difference_type difference_type; + difference_type min_excess = end - begin + 1, ex = 0; + uint64_t result = end; + const uint64_t l = ((begin + 7) / 8) * 8; + const uint64_t r = (end / 8) * 8; + for (uint64_t k = begin; k < std::min(end, l); ++k) + { + if (bp[k]) + { + ++ex; + if (ex <= min_excess) + { + result = k; + min_excess = ex; + } + } + else + { + --ex; + } + } + uint64_t const * b = bp.data(); + for (uint64_t k = l; k < r; k += 8) + { + uint16_t x = excess<>::data.min_open_excess_info[((*(b + (k >> 6))) >> (k & 0x3F)) & 0xFF]; + int8_t ones = (x >> 12); + if (ones) + { + int8_t min_ex = (x & 0xFF) - 8; + if (ex + min_ex <= min_excess) + { + result = k + ((x >> 8) & 0xF); + min_excess = ex + min_ex; + } + } + ex += ((ones << 1) - 8); + } + for (uint64_t k = std::max(r, l); k < end; ++k) + { + if (bp[k]) + { + ++ex; + if (ex <= min_excess) + { + result = k; + min_excess = ex; + } + } + else + { + --ex; + } + } + if (min_excess <= ex) + return result; + return end; +} +} +#endif +#ifndef NDEBUG +# include +#endif +#include +#include +namespace sdsl +{ +template , + class t_select = select_support_mcl<>> +class bp_support_sada +{ +public: + typedef bit_vector::size_type size_type; + typedef bit_vector::difference_type difference_type; + typedef int_vector<> sml_block_array_type; + typedef int_vector<> med_block_array_type; + typedef t_rank rank_type; + typedef t_select select_type; +private: + static_assert(0 < t_sml_blk, "bp_support_sada: t_sml_blk should be greater than 0!"); + bit_vector const * m_bp = nullptr; + rank_type m_bp_rank; + select_type m_bp_select; + sml_block_array_type m_sml_block_min_max; + med_block_array_type m_med_block_min_max; + size_type m_size = 0; + size_type m_sml_blocks = 0; + size_type m_med_blocks = 0; + size_type m_med_inner_blocks = 0; +#ifdef USE_CACHE + mutable fast_cache find_close_cache; + mutable fast_cache find_open_cache; + mutable fast_cache select_cache; +#endif + static inline size_type sml_block_idx(size_type i) + { + return i / t_sml_blk; + } + static inline size_type med_block_idx(size_type i) + { + return i / (t_sml_blk * t_med_deg); + } + static inline bool is_root(size_type v) + { + return v == 0; + } + static inline bool is_left_child(size_type v) + { + assert(!is_root(v)); + return v % 2; + } + static inline bool is_right_child(size_type v) + { + assert(!is_root(v)); + return !(v % 2); + } + static inline size_type parent(size_type v) + { + assert(!is_root(v)); + return (v - 1) / 2; + } + static inline size_type left_child(size_type v) + { + return 2 * v + 1; + } + static inline size_type right_child(size_type v) + { + return 2 * v + 2; + } + inline bool node_exists(size_type v) const + { + return v < (m_med_inner_blocks + m_med_blocks); + } + static inline size_type right_sibling(size_type v) + { + return ++v; + } + static inline size_type left_sibling(size_type v) + { + return --v; + } + inline bool is_leaf(size_type v) const + { + return v >= m_med_inner_blocks; + } + inline difference_type min_value(size_type v) const + { + return m_size - ((difference_type)m_med_block_min_max[2 * v]); + } + inline difference_type max_value(size_type v) const + { + return m_med_block_min_max[2 * v + 1] - m_size; + } + inline difference_type sml_min_value(size_type sml_block) const + { + return (1 - ((difference_type)m_sml_block_min_max[sml_block << 1])); + } + inline difference_type sml_max_value(size_type sml_block) const + { + return (difference_type)m_sml_block_min_max[(sml_block << 1) + 1] - 1; + } + void print_node(size_type v) const + { + std::cout << "v = " << v << " (" << min_value(v) << ", " << max_value(v) << ")"; + if (is_leaf(v)) + { + std::cout << " range: [" << (v - m_med_inner_blocks) * t_med_deg * t_sml_blk << "," + << (v - m_med_inner_blocks + 1) * t_med_deg * t_sml_blk - 1 << "]"; + } + std::cout << std::endl; + } + size_type fwd_excess(size_type i, difference_type rel) const + { + size_type j; + if ((j = near_fwd_excess(*m_bp, i + 1, rel, t_sml_blk)) > i) + { + return j; + } + difference_type desired_excess = excess(i) + rel; + if ((j = fwd_excess_in_med_block(sml_block_idx(i) + 1, desired_excess)) != size()) + { + return j; + } + if (med_block_idx(i) == m_med_blocks) + return size(); + size_type v = m_med_inner_blocks + med_block_idx(i); + while (!is_root(v)) + { + if (is_left_child(v)) + { + v = right_sibling(v); + if (min_value(v) <= desired_excess and desired_excess <= max_value(v)) + break; + } + v = parent(v); + } + if (!is_root(v)) + { + while (!is_leaf(v)) + { + v = left_child(v); + if (!(min_value(v) <= desired_excess and desired_excess <= max_value(v))) + { + v = right_sibling(v); + assert((min_value(v) <= desired_excess and desired_excess <= max_value(v))); + } + } + return fwd_excess_in_med_block((v - m_med_inner_blocks) * t_med_deg, desired_excess); + } + return size(); + } + size_type bwd_excess(size_type i, difference_type rel) const + { + size_type j; + if (i == 0) + { + return rel == 0 ? -1 : size(); + } + if ((j = near_bwd_excess(*m_bp, i - 1, rel, t_sml_blk)) < i or j == (size_type)-1) + { + return j; + } + difference_type desired_excess = excess(i) + rel; + if ((j = bwd_excess_in_med_block(sml_block_idx(i) - 1, desired_excess)) != size()) + { + return j; + } + if (med_block_idx(i) == 0) + { + if (desired_excess == 0) + return -1; + return size(); + } + size_type v = m_med_inner_blocks + med_block_idx(i); + while (!is_root(v)) + { + if (is_right_child(v)) + { + v = left_sibling(v); + if (min_value(v) <= desired_excess and desired_excess <= max_value(v)) + break; + } + v = parent(v); + } + if (!is_root(v)) + { + while (!is_leaf(v)) + { + v = right_child(v); + if (!(min_value(v) <= desired_excess and desired_excess <= max_value(v))) + { + v = left_sibling(v); + assert((min_value(v) <= desired_excess and desired_excess <= max_value(v))); + } + } + return bwd_excess_in_med_block((v - m_med_inner_blocks) * t_med_deg + (t_med_deg - 1), desired_excess); + } + else if (desired_excess == 0) + { + return -1; + } + return size(); + } + size_type bwd_excess_in_med_block(size_type sml_block_idx, difference_type desired_excess) const + { + size_type first_sml_block_in_med_block = (med_block_idx(sml_block_idx * t_sml_blk)) * t_med_deg; + while ((sml_block_idx + 1) and sml_block_idx >= first_sml_block_in_med_block) + { + difference_type ex = (sml_block_idx == 0) ? 0 : excess(sml_block_idx * t_sml_blk - 1); + difference_type min_ex = ex + (1 - ((difference_type)m_sml_block_min_max[2 * sml_block_idx])); + difference_type max_ex = ex + (m_sml_block_min_max[2 * sml_block_idx + 1] - 1); + if (min_ex <= desired_excess and desired_excess <= max_ex) + { + size_type j = near_bwd_excess(*m_bp, + (sml_block_idx + 1) * t_sml_blk - 1, + desired_excess - excess((sml_block_idx + 1) * t_sml_blk), + t_sml_blk); + return j; + } + --sml_block_idx; + } + if (sml_block_idx == 0 and desired_excess == 0) + return -1; + return size(); + } + size_type fwd_excess_in_med_block(size_type sml_block_idx, difference_type desired_excess) const + { + size_type first_sml_block_nr_in_next_med_block = (med_block_idx(sml_block_idx * t_sml_blk) + 1) * t_med_deg; + if (first_sml_block_nr_in_next_med_block > m_sml_blocks) + first_sml_block_nr_in_next_med_block = m_sml_blocks; + assert(sml_block_idx > 0); + while (sml_block_idx < first_sml_block_nr_in_next_med_block) + { + difference_type ex = excess(sml_block_idx * t_sml_blk - 1); + difference_type min_ex = ex + (1 - ((difference_type)m_sml_block_min_max[2 * sml_block_idx])); + difference_type max_ex = ex + m_sml_block_min_max[2 * sml_block_idx + 1] - 1; + if (min_ex <= desired_excess and desired_excess <= max_ex) + { + size_type j = near_fwd_excess(*m_bp, sml_block_idx * t_sml_blk, desired_excess - ex, t_sml_blk); + return j; + } + ++sml_block_idx; + } + return size(); + } +public: + rank_type const & bp_rank = m_bp_rank; + select_type const & bp_select = m_bp_select; + sml_block_array_type const & sml_block_min_max = m_sml_block_min_max; + med_block_array_type const & med_block_min_max = m_med_block_min_max; + bp_support_sada() + {} + bp_support_sada(bp_support_sada const & v) : + m_bp(v.m_bp), + m_bp_rank(v.m_bp_rank), + m_bp_select(v.m_bp_select), + m_sml_block_min_max(v.m_sml_block_min_max), + m_med_block_min_max(v.m_med_block_min_max), + m_size(v.m_size), + m_sml_blocks(v.m_sml_blocks), + m_med_blocks(v.m_med_blocks), + m_med_inner_blocks(v.m_med_inner_blocks) + { + m_bp_rank.set_vector(m_bp); + m_bp_select.set_vector(m_bp); + } + bp_support_sada(bp_support_sada && bp_support) + { + *this = std::move(bp_support); + } + bp_support_sada & operator=(bp_support_sada && bp_support) + { + if (this != &bp_support) + { + m_bp = std::move(bp_support.m_bp); + m_bp_rank = std::move(bp_support.m_bp_rank); + m_bp_rank.set_vector(m_bp); + m_bp_select = std::move(bp_support.m_bp_select); + m_bp_select.set_vector(m_bp); + m_sml_block_min_max = std::move(bp_support.m_sml_block_min_max); + m_med_block_min_max = std::move(bp_support.m_med_block_min_max); + m_size = std::move(bp_support.m_size); + m_sml_blocks = std::move(bp_support.m_sml_blocks); + m_med_blocks = std::move(bp_support.m_med_blocks); + m_med_inner_blocks = std::move(bp_support.m_med_inner_blocks); + } + return *this; + } + bp_support_sada & operator=(bp_support_sada const & v) + { + if (this != &v) + { + bp_support_sada tmp(v); + *this = std::move(tmp); + } + return *this; + } + explicit bp_support_sada(bit_vector const * bp) : + m_bp(bp), + m_size(bp == nullptr ? 0 : bp->size()), + m_sml_blocks((m_size + t_sml_blk - 1) / t_sml_blk), + m_med_blocks((m_size + t_sml_blk * t_med_deg - 1) / (t_sml_blk * t_med_deg)), + m_med_inner_blocks(0) + { + if (bp == nullptr or bp->size() == 0) + return; + util::init_support(m_bp_rank, bp); + util::init_support(m_bp_select, bp); + m_med_inner_blocks = 1; + while (m_med_inner_blocks < m_med_blocks) + { + m_med_inner_blocks <<= 1; + assert(m_med_inner_blocks != 0); + } + --m_med_inner_blocks; + assert((m_med_inner_blocks == 0) or (m_med_inner_blocks % 2 == 1)); + m_sml_block_min_max = int_vector<>(2 * m_sml_blocks, 0, bits::hi(t_sml_blk + 2) + 1); + m_med_block_min_max = int_vector<>(2 * (m_med_blocks + m_med_inner_blocks), 0, bits::hi(2 * m_size + 2) + 1); + difference_type min_ex = 1, max_ex = -1, curr_rel_ex = 0, curr_abs_ex = 0; + for (size_type i = 0; i < m_size; ++i) + { + if ((*bp)[i]) + ++curr_rel_ex; + else + --curr_rel_ex; + if (curr_rel_ex > max_ex) + max_ex = curr_rel_ex; + if (curr_rel_ex < min_ex) + min_ex = curr_rel_ex; + if ((i + 1) % t_sml_blk == 0 or i + 1 == m_size) + { + size_type sidx = i / t_sml_blk; + m_sml_block_min_max[2 * sidx] = -(min_ex - 1); + m_sml_block_min_max[2 * sidx + 1] = max_ex + 1; + size_type v = m_med_inner_blocks + sidx / t_med_deg; + if ((difference_type)(-(curr_abs_ex + min_ex) + m_size) > ((difference_type)m_med_block_min_max[2 * v])) + { + assert(curr_abs_ex + min_ex <= min_value(v)); + m_med_block_min_max[2 * v] = -(curr_abs_ex + min_ex) + m_size; + } + if ((difference_type)(curr_abs_ex + max_ex + m_size) > (difference_type)m_med_block_min_max[2 * v + 1]) + m_med_block_min_max[2 * v + 1] = curr_abs_ex + max_ex + m_size; + curr_abs_ex += curr_rel_ex; + min_ex = 1; + max_ex = -1; + curr_rel_ex = 0; + } + } + for (size_type v = m_med_block_min_max.size() / 2 - 1; !is_root(v); --v) + { + size_type p = parent(v); + if (min_value(v) < min_value(p)) + m_med_block_min_max[2 * p] = m_med_block_min_max[2 * v]; + if (max_value(v) > max_value(p)) + m_med_block_min_max[2 * p + 1] = m_med_block_min_max[2 * v + 1]; + } + } + void set_vector(bit_vector const * bp) + { + m_bp = bp; + m_bp_rank.set_vector(bp); + m_bp_select.set_vector(bp); + } + inline difference_type excess(size_type i) const + { + return (m_bp_rank(i + 1) << 1) - i - 1; + } + size_type rank(size_type i) const + { + return m_bp_rank(i + 1); + } + size_type select(size_type i) const + { +#ifdef USE_CACHE + size_type a = 0; + if (select_cache.exists(i, a)) + { + return a; + } + else + { + a = m_bp_select(i); + select_cache.write(i, a); + return a; + } +#endif + return m_bp_select(i); + } + size_type find_close(size_type i) const + { + assert(i < m_size); + if (!(*m_bp)[i]) + { + return i; + } +#ifdef USE_CACHE + size_type a = 0; + if (find_close_cache.exists(i, a)) + { + return a; + } + else + { + a = fwd_excess(i, -1); + find_close_cache.write(i, a); + return a; + } +#endif + return fwd_excess(i, -1); + } + size_type find_open(size_type i) const + { + assert(i < m_size); + if ((*m_bp)[i]) + { + return i; + } +#ifdef USE_CACHE + size_type a = 0; + if (find_open_cache.exists(i, a)) + { + return a; + } + else + { + size_type bwd_ex = bwd_excess(i, 0); + if (bwd_ex == size()) + a = size(); + else + a = bwd_ex + 1; + find_open_cache.write(i, a); + return a; + } +#endif + size_type bwd_ex = bwd_excess(i, 0); + if (bwd_ex == size()) + return size(); + else + return bwd_ex + 1; + } + size_type enclose(size_type i) const + { + assert(i < m_size); + if (!(*m_bp)[i]) + { + return find_open(i); + } + size_type bwd_ex = bwd_excess(i, -2); + if (bwd_ex == size()) + return size(); + else + return bwd_ex + 1; + } + size_type rr_enclose(const size_type i, const size_type j) const + { + assert(j < m_size); + assert((*m_bp)[i] == 1 and (*m_bp)[j] == 1); + const size_type mip1 = find_close(i) + 1; + if (mip1 >= j) + return size(); + return rmq_open(mip1, j); + } + size_type rmq_open(const size_type l, const size_type r) const + { + assert(r < m_bp->size()); + if (l >= r) + return size(); + size_type res = rmq(l, r - 1); + assert(res >= l and res <= r - 1); + if ((*m_bp)[res] == 1) + { + assert(find_close(res) >= r); + return res; + } + else + { + res = res + 1; + if (res < r) + { + assert((*m_bp)[res] == 1); + size_type ec = enclose(res); + if (ec < l or ec == size()) + { + assert(find_close(res) >= r); + return res; + } + else + { + assert(find_close(ec) >= r); + return ec; + } + } + else if (res == r) + { + size_type ec = enclose(res); + if (ec >= l) + { + assert(ec == size() or excess(ec) == excess(res - 1)); + return ec; + } + } + } + return size(); + } + size_type median_block_rmq(size_type l_sblock, size_type r_sblock, bit_vector::difference_type & min_ex) const + { + assert(l_sblock <= r_sblock + 1); + size_type pos_min_block = (size_type)-1; + difference_type e = 0; + if (l_sblock == 0) + { + if (sml_min_value(0) <= min_ex) + { + pos_min_block = 0; + min_ex = sml_min_value(0); + } + l_sblock = 1; + } + for (size_type i = l_sblock; i <= r_sblock; ++i) + { + if ((e = (excess(i * t_sml_blk - 1) + sml_min_value(i))) <= min_ex) + { + pos_min_block = i; + min_ex = e; + } + } + return pos_min_block; + } + size_type rmq(size_type l, size_type r) const + { + assert(l <= r); + size_type sbl = sml_block_idx(l); + size_type sbr = sml_block_idx(r); + difference_type min_rel_ex = 0; + if (sbl == sbr) + { + return near_rmq(*m_bp, l, r, min_rel_ex); + } + else + { + difference_type min_ex = 0; + size_type min_pos = 0; + enum min_pos_type + { + POS, + SMALL_BLOCK_POS, + MEDIUM_BLOCK_POS + }; + enum min_pos_type pos_type = POS; + min_pos = near_rmq(*m_bp, l, (sbl + 1) * t_sml_blk - 1, min_rel_ex); + assert(min_pos >= l); + min_ex = excess(l) + min_rel_ex; + size_type mbl = med_block_idx(l); + size_type mbr = med_block_idx(r); + assert(mbl <= mbr); + size_type temp = median_block_rmq(sbl + 1, + std::min((mbl + 1) * t_med_deg - 1, sbr - 1), + min_ex); + if (temp != (size_type)-1) + { + assert(temp * t_sml_blk >= l and temp * t_sml_blk <= r); + min_pos = temp; + assert(min_pos < m_sml_blocks); + pos_type = SMALL_BLOCK_POS; + } +#if 0 + for (size_type v=mbl+1+m_med_inner_blocks; v < mbr + m_med_inner_blocks; ++v) { + assert(is_leaf(v)); + if (min_value(v) <= min_ex) { + min_ex = min_value(v); + min_pos = v; + assert(min_pos-m_med_inner_blocks >= 0 and min_pos < m_med_blocks-m_med_inner_blocks); + pos_type = MEDIUM_BLOCK_POS; + } + } +#else + if (mbr - mbl > 1) + { + size_type v = mbl + 1 + m_med_inner_blocks; + size_type rcb = mbl + 1; + size_type h = 0; + while (rcb < mbr - 1) + { + if (min_value(v) <= min_ex) + { + min_ex = min_value(v); + min_pos = v; + pos_type = MEDIUM_BLOCK_POS; + } + if (is_right_child(v)) + { + h += 1; + rcb += (1ULL << h); + v = right_sibling(parent(v)); + } + else + { + rcb += (1ULL << h); + h += 1; + v = parent(v); + } + } + if (rcb <= mbr - 1 and min_value(v) <= min_ex) + { + min_ex = min_value(v); + min_pos = v; + pos_type = MEDIUM_BLOCK_POS; + } + assert(node_exists(v)); + assert(rcb >= mbr - 1); + while (rcb != mbr - 1) + { + assert(h != (size_type)-1); + if (rcb > mbr - 1) + { + h = h - 1; + rcb = rcb - (1ULL << h); + v = left_child(v); + } + else + { + h = h - 1; + rcb = rcb + (1ULL << h); + v = right_sibling(right_child(v)); + } + if (rcb <= mbr - 1 and min_value(v) <= min_ex) + { + min_ex = min_value(v); + min_pos = v; + pos_type = MEDIUM_BLOCK_POS; + } + } + if (pos_type == MEDIUM_BLOCK_POS) + { + while (!is_leaf(min_pos)) + { + min_pos = right_child(min_pos); + if (!node_exists(min_pos) or min_value(min_pos) > min_ex) + min_pos = left_sibling(min_pos); + } + } + } +#endif + temp = median_block_rmq(std::max(mbr * t_med_deg, sbl + 1), sbr - 1, min_ex); + if (temp != (size_type)-1) + { + assert(temp * t_sml_blk >= l and temp * t_sml_blk <= r); + min_pos = temp; + pos_type = SMALL_BLOCK_POS; + } + temp = near_rmq(*m_bp, sbr * t_sml_blk, r, min_rel_ex); + if ((excess(sbr * t_sml_blk) + min_rel_ex) <= min_ex) + { + assert(temp >= l and temp <= r); + return temp; + } + if (pos_type == MEDIUM_BLOCK_POS) + { + min_pos = min_pos - m_med_inner_blocks; + temp = median_block_rmq(min_pos * t_med_deg, (min_pos + 1) * t_med_deg - 1, min_ex); + assert(temp != (size_type)-1); + assert(temp * t_sml_blk >= l and temp * t_sml_blk <= r); + min_pos = temp; + pos_type = SMALL_BLOCK_POS; + } + if (pos_type == SMALL_BLOCK_POS) + { + min_pos = near_rmq(*m_bp, min_pos * t_sml_blk, (min_pos + 1) * t_sml_blk - 1, min_rel_ex); + assert(min_pos >= l and min_pos <= r); + } + return min_pos; + } + } + size_type rr_enclose_naive(size_type i, size_type j) const + { + assert(j > i and j < m_size); + assert((*m_bp)[i] == 1 and (*m_bp)[j] == 1); + size_type mi = find_close(i); + assert(mi > i and mi < j); + assert(find_close(j) > j); + size_type k = enclose(j); + if (k == m_size or k < i) + return m_size; + size_type kk; + do + { + kk = k; + k = enclose(k); + } + while (k != m_size and k > mi); + return kk; + } + size_type double_enclose(size_type i, size_type j) const + { + assert(j > i); + assert((*m_bp)[i] == 1 and (*m_bp)[j] == 1); + size_type k = rr_enclose(i, j); + if (k == size()) + return enclose(j); + else + return enclose(k); + } + size_type preceding_closing_parentheses(size_type i) const + { + assert(i < m_size); + if (!i) + return 0; + size_type ones = m_bp_rank(i); + if (ones) + { + assert(m_bp_select(ones) < i); + return i - m_bp_select(ones) - 1; + } + else + { + return i; + } + } + size_type level_anc(size_type i, size_type d) const + { + assert(i < m_size); + size_type bwd_ex = bwd_excess(i, -d - 1); + if (bwd_ex == size()) + return size(); + else + return bwd_ex + 1; + } + size_type size() const + { + return m_size; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "size"); + written_bytes += write_member(m_sml_blocks, out, child, "sml_block_cnt"); + written_bytes += write_member(m_med_blocks, out, child, "med_block_cnt"); + written_bytes += write_member(m_med_inner_blocks, out, child, "med_inner_blocks"); + written_bytes += m_bp_rank.serialize(out, child, "bp_rank"); + written_bytes += m_bp_select.serialize(out, child, "bp_select"); + written_bytes += m_sml_block_min_max.serialize(out, child, "sml_blocks"); + written_bytes += m_med_block_min_max.serialize(out, child, "med_blocks"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in, bit_vector const * bp) + { + m_bp = bp; + read_member(m_size, in); + assert(m_size == bp->size()); + read_member(m_sml_blocks, in); + read_member(m_med_blocks, in); + read_member(m_med_inner_blocks, in); + m_bp_rank.load(in, m_bp); + m_bp_select.load(in, m_bp); + m_sml_block_min_max.load(in); + m_med_block_min_max.load(in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_sml_blocks)); + ar(CEREAL_NVP(m_med_blocks)); + ar(CEREAL_NVP(m_med_inner_blocks)); + ar(CEREAL_NVP(m_bp_rank)); + ar(CEREAL_NVP(m_bp_select)); + ar(CEREAL_NVP(m_sml_block_min_max)); + ar(CEREAL_NVP(m_med_block_min_max)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_sml_blocks)); + ar(CEREAL_NVP(m_med_blocks)); + ar(CEREAL_NVP(m_med_inner_blocks)); + ar(CEREAL_NVP(m_bp_rank)); + ar(CEREAL_NVP(m_bp_select)); + ar(CEREAL_NVP(m_sml_block_min_max)); + ar(CEREAL_NVP(m_med_block_min_max)); + } + bool operator==(bp_support_sada const & other) const noexcept + { + return (m_bp_rank == other.m_bp_rank) && (m_bp_select == other.m_bp_select) + && (m_sml_block_min_max == other.m_sml_block_min_max) && (m_med_block_min_max == other.m_med_block_min_max) + && (m_size == other.m_size) && (m_sml_blocks == other.m_sml_blocks) && (m_med_blocks == other.m_med_blocks) + && (m_med_inner_blocks == other.m_med_inner_blocks); + } + bool operator!=(bp_support_sada const & other) const noexcept + { + return !(*this == other); + } +}; +} +#endif +#ifndef INCLUDED_SDSL_CST_ITERATORS +#define INCLUDED_SDSL_CST_ITERATORS +#include +#include +#include +namespace sdsl +{ +template +class cst_dfs_const_forward_iterator +{ +public: + using iterator_category = std::forward_iterator_tag; + using value_type = typename Cst::node_type; + using difference_type = std::ptrdiff_t; + using pointer = value_type *; + using reference = value_type &; + typedef const value_type const_reference; + typedef typename Cst::size_type size_type; + typedef cst_dfs_const_forward_iterator iterator; + typedef typename Cst::node_type node_type; +private: + Cst const * m_cst; + node_type m_v; + bool m_visited; + bool m_valid; + node_type * m_stack_cache; + uint32_t m_stack_size; + inline node_type parent() + { + --m_stack_size; + if (m_stack_cache != nullptr and m_stack_size < cache_size) + { + return m_stack_cache[m_stack_size]; + } + else + return m_cst->parent(m_v); + } + inline node_type first_child() + { + if (m_stack_cache != nullptr and m_stack_size < cache_size) + m_stack_cache[m_stack_size] = m_v; + m_stack_size++; + return m_cst->select_child(m_v, 1); + } + cst_dfs_const_forward_iterator() : m_cst(nullptr), m_visited(false), m_valid(false), m_stack_cache(nullptr) + {} +public: + cst_dfs_const_forward_iterator(Cst const * cst, const value_type node, bool visited = false, bool valid = true) : + m_visited(visited), + m_valid(valid), + m_stack_cache(nullptr) + { + m_cst = cst; + m_v = node; + if (m_cst == nullptr) + { + m_valid = false; + } + else if (m_v == m_cst->root() and !m_visited and m_valid) + { + m_stack_cache = new node_type[cache_size]; + m_stack_size = 0; + } + } + ~cst_dfs_const_forward_iterator() + { + if (m_stack_cache != nullptr) + { + delete[] m_stack_cache; + } + } + uint8_t visit() const + { + return 1 + (uint8_t)m_visited; + } + void skip_subtree() + { + if (m_valid) + { + if (!m_visited) + { + m_visited = true; + } + } + } + const_reference operator*() const + { + return m_v; + } + iterator & operator++() + { + if (!m_valid) + return *this; + if (m_v == m_cst->root() and m_visited) + { + m_valid = false; + return *this; + } + value_type w; + if (!m_visited) + { + if (m_cst->is_leaf(m_v)) + { + w = m_cst->sibling(m_v); + if (w == m_cst->root()) + { + w = parent(); + m_visited = true; + } + } + else + { + w = first_child(); + } + } + else + { + w = m_cst->sibling(m_v); + if (w == m_cst->root()) + { + w = parent(); + } + else + { + m_visited = false; + } + } + m_v = w; + return *this; + } + void operator++(int) + { + ++(*this); + } + bool operator==(iterator const & it) const + { + return (it.m_visited == m_visited) + and (it.m_valid == m_valid) + and (it.m_v == m_v) + and (it.m_cst == m_cst); + } + bool operator!=(iterator const & it) const + { + return !(*this == it); + } +}; +template +class cst_bottom_up_const_forward_iterator +{ +public: + using iterator_category = std::forward_iterator_tag; + using value_type = typename Cst::node_type; + using difference_type = std::ptrdiff_t; + using pointer = value_type *; + using reference = value_type &; + typedef const value_type const_reference; + typedef typename Cst::size_type size_type; + typedef cst_bottom_up_const_forward_iterator iterator; +private: + Cst const * m_cst; + typename Cst::node_type m_v; + bool m_valid; +public: + cst_bottom_up_const_forward_iterator() : m_cst(nullptr), m_valid(false) + {} + cst_bottom_up_const_forward_iterator(Cst const * cst, const value_type node, bool valid = true) : m_valid(valid) + { + m_cst = cst; + m_v = node; + if (m_cst == nullptr) + m_valid = false; + } + const_reference operator*() const + { + return m_v; + } + iterator & operator++() + { + if (!m_valid) + return *this; + if (m_v == m_cst->root()) + { + m_valid = false; + return *this; + } + value_type w = m_cst->sibling(m_v); + if (w == m_cst->root()) + { + m_v = m_cst->parent(m_v); + } + else + { + m_v = m_cst->leftmost_leaf(w); + } + return *this; + } + iterator operator++(int) + { + iterator it = *this; + ++(*this); + return it; + } + bool operator==(iterator const & it) const + { + return (it.m_valid == m_valid) + and (it.m_v == m_v) + and (it.m_cst == m_cst); + } + bool operator!=(iterator const & it) const + { + return !(*this == it); + } +}; +template > +class cst_bfs_iterator +{ +public: + using iterator_category = std::forward_iterator_tag; + using value_type = typename Cst::node_type; + using difference_type = std::ptrdiff_t; + using pointer = value_type *; + using reference = value_type &; + typedef const value_type const_reference; + typedef typename Cst::size_type size_type; + typedef cst_bfs_iterator iterator; + typedef Queue queue_type; +private: + Cst const * m_cst; + queue_type m_queue; + bool m_valid; +public: + cst_bfs_iterator(Cst const * cst, const value_type node, bool valid = true, bool end_it = false) + { + m_cst = cst; + m_valid = valid; + if (m_cst != nullptr and !end_it) + { + m_queue.push(node); + } + } + size_type size() const + { + return m_queue.size(); + } + const_reference operator*() const + { + return m_queue.front(); + } + iterator & operator++() + { + if (!m_valid) + return *this; + if (m_queue.empty()) + { + m_valid = false; + return *this; + } + value_type v = m_queue.front(); + m_queue.pop(); + value_type child = m_cst->select_child(v, 1); + while (m_cst->root() != child) + { + m_queue.push(child); + child = m_cst->sibling(child); + } + return *this; + } + iterator operator++(int) + { + iterator it = *this; + ++(*this); + return it; + } + bool operator==(iterator const & it) const + { + if (m_queue.size() != it.m_queue.size()) + { + return false; + } + if (m_queue.empty()) + { + return it.m_valid == m_valid and it.m_cst == m_cst; + } + return (it.m_valid == m_valid) + and (it.m_cst == m_cst) + and (it.m_queue.front() == m_queue.front()) + and (it.m_queue.back() == m_queue.back()); + } + bool operator!=(iterator const & it) const + { + return !(*this == it); + } +}; +} +#endif +#ifndef INCLUDED_SDSL_CST_SADA +#define INCLUDED_SDSL_CST_SADA +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_LCP +#define INCLUDED_SDSL_LCP +#include +#include +#include +namespace sdsl +{ +struct cache_config; +template +void construct_lcp(t_lcp & lcp, t_cst const & cst, cache_config & config) +{ + typename t_lcp::lcp_category tag; + construct_lcp(lcp, cst, config, tag); +} +template +void construct_lcp(t_lcp & lcp, t_cst const &, cache_config & config, lcp_plain_tag) +{ + lcp = t_lcp(config); +} +template +void construct_lcp(t_lcp & lcp, t_cst const & cst, cache_config & config, lcp_permuted_tag) +{ + lcp = t_lcp(config, &(cst.csa)); +} +template +void construct_lcp(t_lcp & lcp, t_cst const & cst, cache_config & config, lcp_tree_compressed_tag) +{ + lcp = t_lcp(config, &cst); +} +template +void construct_lcp(t_lcp & lcp, t_cst const & cst, cache_config & config, lcp_tree_and_lf_compressed_tag) +{ + lcp = t_lcp(config, &cst); +} +template +void copy_lcp(t_lcp & lcp, t_lcp const & lcp_c, t_cst const & cst) +{ + typename t_lcp::lcp_category tag; + copy_lcp(lcp, lcp_c, cst, tag); +} +template +void copy_lcp(t_lcp & lcp, t_lcp const & lcp_c, t_cst const &, lcp_plain_tag) +{ + lcp = lcp_c; +} +template +void copy_lcp(t_lcp & lcp, t_lcp const & lcp_c, t_cst const & cst, lcp_permuted_tag) +{ + lcp = lcp_c; + lcp.set_csa(&(cst.csa)); +} +template +void copy_lcp(t_lcp & lcp, t_lcp const & lcp_c, t_cst const & cst, lcp_tree_compressed_tag) +{ + lcp = lcp_c; + lcp.set_cst(&cst); +} +template +void copy_lcp(t_lcp & lcp, t_lcp const & lcp_c, t_cst const & cst, lcp_tree_and_lf_compressed_tag) +{ + lcp = lcp_c; + lcp.set_cst(&cst); +} +template +void move_lcp(t_lcp && lcp, t_lcp && lcp_c, t_cst const & cst) +{ + typename std::remove_reference::type::lcp_category tag; + move_lcp(std::forward(lcp), std::forward(lcp_c), cst, tag); +} +template +void move_lcp(t_lcp && lcp, t_lcp && lcp_c, t_cst const &, lcp_plain_tag) +{ + lcp = std::move(lcp_c); +} +template +void move_lcp(t_lcp && lcp, t_lcp && lcp_c, t_cst const & cst, lcp_permuted_tag) +{ + lcp = std::move(lcp_c); + lcp.set_csa(&(cst.csa)); +} +template +void move_lcp(t_lcp && lcp, t_lcp && lcp_c, t_cst const & cst, lcp_tree_compressed_tag) +{ + lcp = std::move(lcp_c); + lcp.set_cst(&cst); +} +template +void move_lcp(t_lcp && lcp, t_lcp && lcp_c, t_cst const & cst, lcp_tree_and_lf_compressed_tag) +{ + lcp = std::move(lcp_c); + lcp.set_cst(&cst); +} +template +void load_lcp(t_lcp & lcp, std::istream & in, t_cst const & cst) +{ + typename t_lcp::lcp_category tag; + load_lcp(lcp, in, cst, tag); +} +template +void load_lcp(t_lcp & lcp, std::istream & in, t_cst const &, lcp_plain_tag) +{ + lcp.load(in); +} +template +void load_lcp(t_lcp & lcp, std::istream & in, t_cst const & cst, lcp_permuted_tag) +{ + lcp.load(in, &(cst.csa)); +} +template +void load_lcp(t_lcp & lcp, std::istream & in, t_cst const & cst, lcp_tree_compressed_tag) +{ + lcp.load(in, &cst); +} +template +void load_lcp(t_lcp & lcp, std::istream & in, t_cst const & cst, lcp_tree_and_lf_compressed_tag) +{ + lcp.load(in, &cst); +} +template +void set_lcp_pointer(t_lcp & lcp, t_cst const & cst) +{ + typename t_lcp::lcp_category tag; + set_lcp_pointer(lcp, cst, tag); +} +template +void set_lcp_pointer(t_lcp &, t_cst const &, lcp_plain_tag) +{} +template +void set_lcp_pointer(t_lcp & lcp, t_cst const & cst, lcp_permuted_tag) +{ + lcp.set_csa(&(cst.csa)); +} +template +void set_lcp_pointer(t_lcp & lcp, t_cst const & cst, lcp_tree_compressed_tag) +{ + lcp.set_cst(&cst); +} +template +void set_lcp_pointer(t_lcp & lcp, t_cst const & cst, lcp_tree_and_lf_compressed_tag) +{ + lcp.set_cst(&cst); +} +} +#endif +#ifndef INCLUDED_SDSL_LCP_SUPPORT_SADA +#define INCLUDED_SDSL_LCP_SUPPORT_SADA +#include +#include +#include +namespace sdsl +{ +template , class t_bitvec = bit_vector, class t_select = typename t_bitvec::select_1_type> +class _lcp_support_sada +{ +public: + typedef typename t_csa::value_type value_type; + typedef random_access_const_iterator<_lcp_support_sada> const_iterator; + typedef const_iterator iterator; + typedef const value_type const_reference; + typedef const_reference reference; + typedef const_reference * pointer; + typedef const pointer const_pointer; + typedef int_vector<>::size_type size_type; + typedef ptrdiff_t difference_type; + typedef t_bitvec bit_vector_type; + typedef t_csa csa_type; + typedef t_select select_type; + typedef lcp_permuted_tag lcp_category; + enum + { + fast_access = 0, + text_order = 1, + sa_order = 0 + }; + template + struct type + { + typedef _lcp_support_sada lcp_type; + }; +private: + csa_type const * m_csa = nullptr; + bit_vector_type m_data; + select_type m_select_support; +public: + t_csa const *& csa = m_csa; + _lcp_support_sada() + {} + _lcp_support_sada(_lcp_support_sada const & lcp_c) : + m_csa(lcp_c.m_csa), + m_data(lcp_c.m_data), + m_select_support(lcp_c.m_select_support) + { + m_select_support.set_vector(&m_data); + } + _lcp_support_sada(_lcp_support_sada && lcp_c) + { + *this = std::move(lcp_c); + } + _lcp_support_sada & operator=(_lcp_support_sada const & lcp_c) + { + if (this != &lcp_c) + { + _lcp_support_sada tmp(lcp_c); + *this = std::move(tmp); + } + return *this; + } + _lcp_support_sada & operator=(_lcp_support_sada && lcp_c) + { + if (this != &lcp_c) + { + m_csa = std::move(lcp_c.m_csa); + m_data = std::move(lcp_c.m_data); + m_select_support = std::move(lcp_c.m_select_support); + m_select_support.set_vector(&m_data); + } + return *this; + } + _lcp_support_sada(cache_config & config, t_csa const * f_csa) + { + typedef typename t_csa::size_type size_type; + set_csa(f_csa); + if (!cache_file_exists(conf::KEY_ISA, config)) + { + construct_isa(config); + } + int_vector<> lcp; + load_from_file(lcp, cache_file_name(conf::KEY_LCP, config)); + std::string isa_file = cache_file_name(conf::KEY_ISA, config); + int_vector_buffer<> isa_buf(isa_file); + size_type n = lcp.size(); + bit_vector data = bit_vector(2 * n, 0); + size_type data_cnt = 0; + for (size_type i = 0, l = 0, old_l = 1; i < n; ++i) + { + l = lcp[isa_buf[i]]; + data_cnt += l + 1 - old_l; + data[data_cnt++] = 1; + old_l = l; + } + data.resize(data_cnt); + data.shrink_to_fit(); + m_data = bit_vector_type(data); + util::init_support(m_select_support, &m_data); + } + void set_csa(t_csa const * f_csa) + { + m_csa = f_csa; + } + size_type size() const + { + return m_csa->size(); + } + static size_type max_size() + { + return t_csa::max_size(); + } + bool empty() const + { + return m_csa->empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } + inline value_type operator[](size_type i) const + { + size_type j = (*m_csa)[i]; + size_type s = m_select_support.select(j + 1); + return s - (j << 1); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_data.serialize(out, child, "data"); + written_bytes += m_select_support.serialize(out, child, "select_support"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in, t_csa const * ccsa = nullptr) + { + m_csa = ccsa; + m_data.load(in); + m_select_support.load(in, &m_data); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_data)); + ar(CEREAL_NVP(m_select_support)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_data)); + ar(CEREAL_NVP(m_select_support)); + m_select_support.set_vector(&m_data); + } + bool operator==(_lcp_support_sada const & other) const noexcept + { + return (m_data == other.m_data) && (m_select_support == other.m_select_support); + } + bool operator!=(_lcp_support_sada const & other) const noexcept + { + return !(*this == other); + } +}; +template +struct lcp_support_sada +{ + template + using type = _lcp_support_sada; +}; +} +#endif +#ifndef INCLUDED_SDSL_SUFFIX_TREE_HELPER +#define INCLUDED_SDSL_SUFFIX_TREE_HELPER +#include +#include +#include +#include +#include +#ifndef INCLUDED_SDSL_SORTED_MULTI_STACK_SUPPORT +#define INCLUDED_SDSL_SORTED_MULTI_STACK_SUPPORT +#include +#include +#include +#include +namespace sdsl +{ +class sorted_multi_stack_support +{ +public: + typedef int_vector<64>::size_type size_type; +private: + size_type m_n; + size_type m_cnt; + size_type m_top; + int_vector<64> m_stack; + int_vector<64> m_duplication_stack; + inline size_type block_nr(size_type x) + { + return x / 63; + }; + inline size_type block_pos(size_type x) + { + return x % 63; + }; +public: + sorted_multi_stack_support(size_type n); + sorted_multi_stack_support(sorted_multi_stack_support const &) = default; + sorted_multi_stack_support(sorted_multi_stack_support &&) = default; + sorted_multi_stack_support & operator=(sorted_multi_stack_support const &) = default; + sorted_multi_stack_support & operator=(sorted_multi_stack_support &&) = default; + bool empty() const + { + return 0 == m_cnt; + }; + size_type top() const; + bool pop(); + bool push(size_type x); + size_type size() const + { + return m_cnt; + }; + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const; + void load(std::istream & in); + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const; + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar); +}; +inline sorted_multi_stack_support::sorted_multi_stack_support(size_type n) : + m_n(n), + m_cnt(0), + m_top(0), + m_stack(), + m_duplication_stack() +{ + m_stack = int_vector<64>(block_nr(m_n + 1) + 1, 0); + m_stack[0] = 1; + m_duplication_stack = int_vector<64>((m_n >> 6) + 1, 0); +} +inline sorted_multi_stack_support::size_type sorted_multi_stack_support::top() const +{ + return m_top - 1; +} +inline bool sorted_multi_stack_support::push(size_type x) +{ + x += 1; + size_type bn = block_nr(x); + if (0 == ((m_stack[bn] >> block_pos(x)) & 1)) + { + m_stack[bn] ^= (1ULL << block_pos(x)); + if (bn > 0 and m_stack[bn - 1] == 0) + { + m_stack[bn - 1] = 0x8000000000000000ULL | m_top; + } + m_top = x; + ++m_cnt; + return true; + } + else + { + m_duplication_stack[m_cnt >> 6] ^= (1ULL << (m_cnt & 0x3F)); + ++m_cnt; + return false; + } +} +inline bool sorted_multi_stack_support::pop() +{ + if (m_cnt) + { + --m_cnt; + if ((m_duplication_stack[m_cnt >> 6] >> (m_cnt & 0x3F)) & 1) + { + m_duplication_stack[m_cnt >> 6] ^= (1ULL << (m_cnt & 0x3F)); + return false; + } + else + { + size_type bn = block_nr(m_top); + uint64_t w = m_stack[bn]; + assert((w >> 63) == 0); + w ^= (1ULL << block_pos(m_top)); + m_stack[bn] = w; + if (w > 0) + { + m_top = bn * 63 + bits::hi(w); + } + else + { + assert(bn > 0); + w = m_stack[bn - 1]; + if ((w >> 63) == 0) + { + assert(w > 0); + m_top = (bn - 1) * 63 + bits::hi(w); + } + else + { + m_stack[bn - 1] = 0; + m_top = w & 0x7FFFFFFFFFFFFFFFULL; + } + } + return true; + } + } + return false; +} +inline sorted_multi_stack_support::size_type +sorted_multi_stack_support::serialize(std::ostream & out, structure_tree_node * v, std::string name) const +{ + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_n, out); + written_bytes += write_member(m_top, out); + written_bytes += write_member(m_cnt, out); + written_bytes += m_stack.serialize(out); + written_bytes += m_duplication_stack.serialize(out); + structure_tree::add_size(child, written_bytes); + return written_bytes; +} +inline void sorted_multi_stack_support::load(std::istream & in) +{ + read_member(m_n, in); + read_member(m_top, in); + read_member(m_cnt, in); + m_stack.load(in); + m_duplication_stack.load(in); +} +template +void sorted_multi_stack_support::CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const +{ + ar(CEREAL_NVP(m_n)); + ar(CEREAL_NVP(m_cnt)); + ar(CEREAL_NVP(m_top)); + ar(CEREAL_NVP(m_stack)); + ar(CEREAL_NVP(m_duplication_stack)); +} +template +void sorted_multi_stack_support::CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) +{ + ar(CEREAL_NVP(m_n)); + ar(CEREAL_NVP(m_cnt)); + ar(CEREAL_NVP(m_top)); + ar(CEREAL_NVP(m_stack)); + ar(CEREAL_NVP(m_duplication_stack)); +} +} +#endif +namespace sdsl +{ +template +class cst_node_child_proxy_iterator +{ +public: + using iterator_category = std::forward_iterator_tag; + using value_type = typename t_cst::node_type; + using difference_type = std::ptrdiff_t; + using pointer = value_type *; + using reference = value_type &; + using node_type = typename t_cst::node_type; + using const_reference = const node_type; + using iterator_type = cst_node_child_proxy_iterator; +private: + t_cst const * m_cst; + node_type m_cur_node; +public: + cst_node_child_proxy_iterator() : m_cst(nullptr){}; + cst_node_child_proxy_iterator(t_cst const * cst, node_type v) : m_cst(cst), m_cur_node(v) + {} + cst_node_child_proxy_iterator(iterator_type const & it) : m_cst(it.m_cst), m_cur_node(it.m_cur_node) + {} +public: + const_reference operator*() const + { + return m_cur_node; + } + iterator_type & operator++() + { + m_cur_node = m_cst->sibling(m_cur_node); + return *this; + } + iterator_type operator++(int) + { + iterator_type it = *this; + ++(*this); + return it; + } + bool operator==(iterator_type const & it) const + { + return it.m_cur_node == m_cur_node; + } + bool operator!=(iterator_type const & it) const + { + return !(*this == it); + } +}; +template +class cst_node_child_proxy +{ +public: + using iterator_type = cst_node_child_proxy_iterator; + using node_type = typename t_cst::node_type; + using size_type = typename t_cst::size_type; +private: + node_type m_parent; + t_cst const * m_cst; +public: + cst_node_child_proxy() = delete; + explicit cst_node_child_proxy(t_cst const * cst, node_type v) : m_parent(v), m_cst(cst){}; + cst_node_child_proxy(cst_node_child_proxy const & p) : m_parent(p.m_parent), m_cst(p.m_cst){}; +public: + node_type operator[](size_type i) const + { + return m_cst->select_child(m_parent, i + 1); + } + size_type size() + { + return m_cst->degree(m_parent); + } + iterator_type begin() const + { + return iterator_type(m_cst, m_cst->select_child(m_parent, 1)); + } + iterator_type end() const + { + return iterator_type(m_cst, m_cst->root()); + } +}; +template +void construct_supercartesian_tree_bp(t_rac const & vec, bit_vector & bp, bool const minimum = true) +{ + typedef typename t_rac::size_type size_type; + bp.resize(2 * vec.size()); + util::set_to_value(bp, 0); + std::stack vec_stack; + size_type k = 0; + for (size_type i = 0; i < vec.size(); ++i) + { + typename t_rac::value_type l = vec[i]; + if (minimum) + { + while (vec_stack.size() > 0 and l < vec_stack.top()) + { + vec_stack.pop(); + ++k; + } + } + else + { + while (vec_stack.size() > 0 and l > vec_stack.top()) + { + vec_stack.pop(); + ++k; + } + } + vec_stack.push(l); + bp[k++] = 1; + } + while (vec_stack.size() > 0) + { + vec_stack.pop(); + bp[k++] = 0; + } + assert(k == 2 * vec.size()); +} +template +bit_vector construct_supercartesian_tree_bp_succinct(t_rac const & vec, bool const minimum = true) +{ + typedef typename t_rac::size_type size_type; + bit_vector bp(2 * vec.size(), 0); + if (vec.size() > 0) + { + sorted_stack_support vec_stack(vec.size()); + size_type k = 0; + if (minimum) + { + bp[k++] = 1; + for (size_type i = 1; i < vec.size(); ++i) + { + if (vec[i] < vec[i - 1]) + { + ++k; + while (vec_stack.size() > 0 and vec[i] < vec[vec_stack.top()]) + { + vec_stack.pop(); + ++k; + } + } + else + { + vec_stack.push(i - 1); + } + bp[k++] = 1; + } + } + else + { + for (size_type i = 0; i < vec.size(); ++i) + { + while (vec_stack.size() > 0 and vec[i] > vec[vec_stack.top()]) + { + vec_stack.pop(); + ++k; + } + vec_stack.push(i); + bp[k++] = 1; + } + } + } + return bp; +} +template +bit_vector construct_supercartesian_tree_bp_succinct(int_vector_buffer & lcp_buf, bool const minimum = true) +{ + typedef bit_vector::size_type size_type; + bit_vector bp(2 * lcp_buf.size(), 0); + if (lcp_buf.size() > 0) + { + sorted_multi_stack_support vec_stack(lcp_buf.size()); + size_type k = 0; + if (minimum) + { + bp[k++] = 1; + size_type last = lcp_buf[0]; + for (size_type i = 1, x; i < lcp_buf.size(); ++i) + { + x = lcp_buf[i]; + if (x < last) + { + ++k; + while (!vec_stack.empty() and x < vec_stack.top()) + { + vec_stack.pop(); + ++k; + } + } + else + { + vec_stack.push(last); + } + bp[k++] = 1; + last = x; + } + } + else + { + for (size_type i = 0, x; i < lcp_buf.size(); ++i) + { + x = lcp_buf[i]; + while (!vec_stack.empty() and x > vec_stack.top()) + { + vec_stack.pop(); + ++k; + } + vec_stack.push(x); + bp[k++] = 1; + } + } + } + return bp; +} +template +bit_vector::size_type construct_supercartesian_tree_bp_succinct_and_first_child(int_vector_buffer & lcp_buf, + bit_vector & bp, + bit_vector & bp_fc, + bool const minimum = true) +{ + typedef bit_vector::size_type size_type; + size_type n = lcp_buf.size(); + bp.resize(2 * n); + bp_fc.resize(n); + if (n == 0) + return 0; + size_type fc_cnt = 0; + util::set_to_value(bp, 0); + util::set_to_value(bp_fc, 0); + sorted_multi_stack_support vec_stack(n); + size_type k = 0; + size_type k_fc = 0; + if (minimum) + { + for (size_type i = 0, x; i < n; ++i) + { + x = lcp_buf[i]; + while (!vec_stack.empty() and x < vec_stack.top()) + { + if (vec_stack.pop()) + { + bp_fc[k_fc] = 1; + ++fc_cnt; + } + ++k; + ++k_fc; + } + vec_stack.push(x); + bp[k++] = 1; + } + } + else + { + for (size_type i = 0, x; i < n; ++i) + { + x = lcp_buf[i]; + while (!vec_stack.empty() and x > vec_stack.top()) + { + if (vec_stack.pop()) + { + bp_fc[k_fc] = 1; + ++fc_cnt; + } + ++k; + ++k_fc; + } + vec_stack.push(x); + bp[k++] = 1; + } + } + while (!vec_stack.empty()) + { + if (vec_stack.pop()) + { + bp_fc[k_fc] = 1; + ++fc_cnt; + } + ++k; + ++k_fc; + } + return fc_cnt; +} +template +typename t_csa::size_type get_char_pos(typename t_csa::size_type idx, typename t_csa::size_type d, t_csa const & csa) +{ + if (d == 0) + return idx; + if (csa.sa_sample_dens + csa.isa_sample_dens > 2 * d + 2) + { + for (typename t_csa::size_type i = 0; i < d; ++i) + idx = csa.psi[idx]; + return idx; + } + return csa.isa[csa[idx] + d]; +} +template +struct has_id +{ + template + static constexpr auto check(T *) -> + typename std::is_same().id(std::declval())), + typename T::size_type>::type + { + return std::true_type(); + } + template + static constexpr std::false_type check(...) + { + return std::false_type(); + } + typedef decltype(check(nullptr)) type; + static constexpr bool value = type::value; +}; +} +#endif +namespace sdsl +{ +template , + class t_lcp = lcp_support_sada<>, + class t_bp_support = bp_support_sada<>, + class t_rank_10 = rank_support_v5<10, 2>, + class t_select_10 = select_support_mcl<10, 2>> +class cst_sada +{ + static_assert(std::is_same::type, csa_tag>::value, + "First template argument has to be a compressed suffix array."); +public: + typedef cst_dfs_const_forward_iterator const_iterator; + typedef cst_bottom_up_const_forward_iterator const_bottom_up_iterator; + typedef typename t_csa::size_type size_type; + typedef ptrdiff_t difference_type; + typedef t_csa csa_type; + typedef typename t_lcp::template type lcp_type; + typedef typename t_csa::char_type char_type; + typedef typename t_csa::string_type string_type; + typedef size_type node_type; + typedef t_bp_support bp_support_type; + typedef t_rank_10 rank_10_type; + typedef t_select_10 select_10_type; + typedef typename t_csa::alphabet_type::comp_char_type comp_char_type; + typedef typename t_csa::alphabet_type::sigma_type sigma_type; + typedef typename t_csa::alphabet_category alphabet_category; + typedef cst_tag index_category; +private: + t_csa m_csa; + lcp_type m_lcp; + bit_vector m_bp; + bp_support_type m_bp_support; + rank_10_type m_bp_rank10; + select_10_type m_bp_select10; + size_type inorder(node_type v) const + { + return m_bp_rank10(m_bp_support.find_close(v + 1) + 1); + } +public: + t_csa const & csa = m_csa; + lcp_type const & lcp = m_lcp; + bit_vector const & bp = m_bp; + bp_support_type const & bp_support = m_bp_support; + rank_10_type const & bp_rank_10 = m_bp_rank10; + select_10_type const & bp_select_10 = m_bp_select10; + cst_sada() = default; + cst_sada(cst_sada const & cst) : + m_csa(cst.m_csa), + m_bp(cst.m_bp), + m_bp_support(cst.m_bp_support), + m_bp_rank10(cst.m_bp_rank10), + m_bp_select10(cst.m_bp_select10) + { + copy_lcp(m_lcp, cst.m_lcp, *this); + m_bp_support.set_vector(&m_bp); + m_bp_rank10.set_vector(&m_bp); + m_bp_select10.set_vector(&m_bp); + } + cst_sada(cst_sada && cst) : + m_csa(std::move(cst.m_csa)), + m_bp(std::move(cst.m_bp)), + m_bp_support(std::move(cst.m_bp_support)), + m_bp_rank10(std::move(cst.m_bp_rank10)), + m_bp_select10(std::move(cst.m_bp_select10)) + { + move_lcp(m_lcp, cst.m_lcp, *this); + m_bp_support.set_vector(&m_bp); + m_bp_rank10.set_vector(&m_bp); + m_bp_select10.set_vector(&m_bp); + } + cst_sada(cache_config & config) + { + { + auto event = memory_monitor::event("bps-dfs"); + int_vector_buffer<> lcp(cache_file_name(conf::KEY_LCP, config)); + bool const o_par = true; + bool const c_par = !o_par; + m_bp.resize(4 * lcp.size()); + if (lcp.size() > 0) + { + sorted_stack_support stack(lcp.size() + 1); + stack.push(0); + size_type p = m_bp.size() - 1; + for (size_type i = lcp.size() - 1; i > 0; --i) + { + size_type co = 1; + size_type x = lcp[i] + 1; + while (stack.top() > x) + { + stack.pop(); + ++co; + } + if (stack.top() < x) + { + stack.push(x); + } + m_bp[p--] = o_par; + while (--co > 0) + m_bp[p--] = c_par; + } + m_bp[p--] = o_par; + while (stack.size() > 1) + { + stack.pop(); + m_bp[p--] = c_par; + } + size_type q = 0; + for (size_type i = 1; i < lcp.size(); ++i) + { + size_type co = 0; + do + { + ++co; + } + while (m_bp[++p] == c_par); + size_type cc = 1; + size_type x = lcp[i] + 1; + while (stack.top() > x) + { + stack.pop(); + ++cc; + } + if (stack.top() < x) + { + stack.push(x); + } + while (co-- > 0) + m_bp[q++] = o_par; + while (cc-- > 0) + m_bp[q++] = c_par; + } + m_bp[q++] = o_par; + while (!stack.empty()) + { + m_bp[q++] = c_par; + stack.pop(); + } + m_bp.resize(q); + } + } + { + auto event = memory_monitor::event("bpss-dfs"); + util::init_support(m_bp_support, &m_bp); + util::init_support(m_bp_rank10, &m_bp); + util::init_support(m_bp_select10, &m_bp); + } + { + auto event = memory_monitor::event("clcp"); + cache_config tmp_config(false, config.dir, config.id, config.file_map); + construct_lcp(m_lcp, *this, tmp_config); + config.file_map = tmp_config.file_map; + } + { + auto event = memory_monitor::event("load csa"); + load_from_cache(m_csa, std::string(conf::KEY_CSA) + "_" + util::class_to_hash(m_csa), config); + } + } + size_type size() const + { + return m_csa.size(); + } + static size_type max_size() + { + return t_csa::max_size(); + } + bool empty() const + { + return m_csa.empty(); + } + const_iterator begin() const + { + if (0 == m_bp.size()) + return end(); + return const_iterator(this, root(), false, true); + } + const_iterator begin(node_type const & v) const + { + if (0 == m_bp.size() and root() == v) + return end(); + return const_iterator(this, v, false, true); + } + const_iterator end() const + { + return const_iterator(this, root(), true, false); + } + const_iterator end(node_type const & v) const + { + if (root() == v) + return end(); + return ++const_iterator(this, v, true, true); + } + const_bottom_up_iterator begin_bottom_up() const + { + if (0 == m_bp.size()) + return end_bottom_up(); + return const_bottom_up_iterator(this, leftmost_leaf(root())); + } + const_bottom_up_iterator end_bottom_up() const + { + return const_bottom_up_iterator(this, root(), false); + } + cst_sada & operator=(cst_sada const & cst) + { + if (this != &cst) + { + cst_sada tmp(cst); + *this = std::move(tmp); + } + return *this; + } + cst_sada & operator=(cst_sada && cst) + { + if (this != &cst) + { + m_csa = std::move(cst.m_csa); + move_lcp(m_lcp, cst.m_lcp, *this); + m_bp = std::move(cst.m_bp); + m_bp_support = std::move(cst.m_bp_support); + m_bp_support.set_vector(&m_bp); + m_bp_rank10 = std::move(cst.m_bp_rank10); + m_bp_rank10.set_vector(&m_bp); + m_bp_select10 = std::move(cst.m_bp_select10); + m_bp_select10.set_vector(&m_bp); + } + return *this; + } + bool operator==(cst_sada const & other) const noexcept + { + return (m_csa == other.m_csa) && (m_lcp == other.m_lcp) && (m_bp == other.m_bp) + && (m_bp_support == other.m_bp_support) && (m_bp_rank10 == other.m_bp_rank10) + && (m_bp_select10 == other.m_bp_select10); + } + bool operator!=(cst_sada const & other) const noexcept + { + return !(*this == other); + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_csa.serialize(out, child, "csa"); + written_bytes += m_lcp.serialize(out, child, "lcp"); + written_bytes += m_bp.serialize(out, child, "bp"); + written_bytes += m_bp_support.serialize(out, child, "bp_support"); + written_bytes += m_bp_rank10.serialize(out, child, "bp_rank_10"); + written_bytes += m_bp_select10.serialize(out, child, "bp_select_10"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + m_csa.load(in); + load_lcp(m_lcp, in, *this); + m_bp.load(in); + m_bp_support.load(in, &m_bp); + m_bp_rank10.load(in, &m_bp); + m_bp_select10.load(in, &m_bp); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_csa)); + ar(CEREAL_NVP(m_lcp)); + ar(CEREAL_NVP(m_bp)); + ar(CEREAL_NVP(m_bp_support)); + ar(CEREAL_NVP(m_bp_rank10)); + ar(CEREAL_NVP(m_bp_select10)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_csa)); + ar(CEREAL_NVP(m_lcp)); + set_lcp_pointer(m_lcp, *this); + ar(CEREAL_NVP(m_bp)); + ar(CEREAL_NVP(m_bp_support)); + m_bp_support.set_vector(&m_bp); + ar(CEREAL_NVP(m_bp_rank10)); + m_bp_rank10.set_vector(&m_bp); + ar(CEREAL_NVP(m_bp_select10)); + m_bp_select10.set_vector(&m_bp); + } + node_type root() const + { + return 0; + } + bool is_leaf(node_type v) const + { + assert(m_bp[v] == 1); + return !m_bp[v + 1]; + } + node_type select_leaf(size_type i) const + { + assert(i > 0 and i <= m_csa.size()); + return m_bp_select10.select(i) - 1; + } + size_type depth(node_type v) const + { + if (v == root()) + return 0; + if (is_leaf(v)) + { + size_type i = m_bp_rank10(v); + return m_csa.size() - m_csa[i]; + } + assert(inorder(v) > 0); + return m_lcp[inorder(v)]; + } + size_type node_depth(node_type v) const + { + return (m_bp_support.rank(v) << 1) - v - 2; + } + size_type size(node_type v) const + { + size_type r = m_bp_support.find_close(v); + return m_bp_rank10(r + 1) - m_bp_rank10(v); + } + node_type leftmost_leaf(const node_type v) const + { + return m_bp_select10(m_bp_rank10(v) + 1) - 1; + } + node_type rightmost_leaf(const node_type v) const + { + size_type r = m_bp_support.find_close(v); + return m_bp_select10(m_bp_rank10(r + 1)) - 1; + } + size_type lb(const node_type v) const + { + return m_bp_rank10(v); + } + size_type rb(const node_type v) const + { + size_type r = m_bp_support.find_close(v); + return m_bp_rank10(r + 1) - 1; + } + node_type parent(node_type v) const + { + assert(m_bp[v] == 1); + if (v == root()) + return root(); + else + { + return m_bp_support.enclose(v); + } + } + cst_node_child_proxy children(node_type v) const + { + return cst_node_child_proxy(this, v); + } + node_type sibling(node_type v) const + { + if (v == root()) + return root(); + node_type sib = m_bp_support.find_close(v) + 1; + if (m_bp[sib]) + return sib; + else + return root(); + } + node_type child(node_type v, const char_type c, size_type & char_pos) const + { + if (is_leaf(v)) + return root(); + comp_char_type cc = m_csa.char2comp[c]; + if (cc == 0 and c != 0) + return root(); + size_type char_ex_max_pos = m_csa.C[cc + 1], char_inc_min_pos = m_csa.C[cc]; + size_type d = depth(v); + size_type res = v + 1; + while (true) + { + if (is_leaf(res)) + { + char_pos = get_char_pos(m_bp_rank10(res), d, m_csa); + } + else + { + char_pos = get_char_pos(inorder(res), d, m_csa); + } + if (char_pos >= char_ex_max_pos) + return root(); + if (char_pos >= char_inc_min_pos) + return res; + res = m_bp_support.find_close(res) + 1; + if (!m_bp[res]) + return root(); + } + } + node_type child(node_type v, const char_type c) const + { + size_type char_pos; + return child(v, c, char_pos); + } + node_type select_child(node_type v, size_type i) const + { + if (is_leaf(v)) + return root(); + size_type res = v + 1; + while (i > 1) + { + res = m_bp_support.find_close(res) + 1; + if (!m_bp[res]) + { + return root(); + } + --i; + } + return res; + } + char_type edge(node_type v, size_type d) const + { + assert(1 <= d); + assert(d <= depth(v)); + size_type i = 0; + if (is_leaf(v)) + { + i = m_bp_rank10(v); + } + else + { + i = inorder(v); + } + size_type order = get_char_pos(i, d - 1, m_csa); + size_type c_begin = 1, c_end = ((size_type)m_csa.sigma) + 1, mid; + while (c_begin < c_end) + { + mid = (c_begin + c_end) >> 1; + if (m_csa.C[mid] <= order) + { + c_begin = mid + 1; + } + else + { + c_end = mid; + } + } + return m_csa.comp2char[c_begin - 1]; + } + node_type lca(node_type v, node_type w) const + { + assert(m_bp[v] == 1 and m_bp[w] == 1); + if (v > w) + { + std::swap(v, w); + } + else if (v == w) + { + return v; + } + if (v == root()) + return root(); + return m_bp_support.double_enclose(v, w); + } + node_type sl(node_type v) const + { + if (v == root()) + return root(); + size_type left = m_bp_rank10(v); + if (is_leaf(v)) + { + return select_leaf(m_csa.psi[left] + 1); + } + size_type right = m_bp_rank10(m_bp_support.find_close(v)) - 1; + assert(left < right); + node_type left_leaf = select_leaf(m_csa.psi[left] + 1); + node_type right_leaf = select_leaf(m_csa.psi[right] + 1); + return lca(left_leaf, right_leaf); + } + node_type sl(node_type v, size_type i) const + { + if (v == root()) + return root(); + size_type left = m_bp_rank10(v); + if (is_leaf(v)) + { + return select_leaf(get_char_pos(left, i, m_csa) + 1); + } + size_type right = m_bp_rank10(m_bp_support.find_close(v)) - 1; + assert(left < right); + node_type left_leaf = select_leaf(get_char_pos(left, i, m_csa) + 1); + node_type right_leaf = select_leaf(get_char_pos(right, i, m_csa) + 1); + return lca(left_leaf, right_leaf); + } + node_type wl(node_type v, const char_type c) const + { + size_type left = m_bp_rank10(v); + size_type right = is_leaf(v) ? left : m_bp_rank10(m_bp_support.find_close(v)) - 1; + size_type c_left = m_csa.bwt.rank(left, c); + size_type c_right = m_csa.bwt.rank(right + 1, c); + if (c_left == c_right) + return root(); + if (c_left + 1 == c_right) + return select_leaf(m_csa.C[m_csa.char2comp[c]] + c_left + 1); + else + { + size_type left = m_csa.C[m_csa.char2comp[c]] + c_left; + size_type right = m_csa.C[m_csa.char2comp[c]] + c_right - 1; + assert(left < right); + node_type left_leaf = select_leaf(left + 1); + node_type right_leaf = select_leaf(right + 1); + return lca(left_leaf, right_leaf); + } + } + size_type sn(node_type v) const + { + assert(is_leaf(v)); + return m_csa[m_bp_rank10(v)]; + } + size_type id(node_type v) const + { + if (m_bp[v + 1]) + { + return size() + (m_bp_support.rank(v) - 1) - m_bp_rank10(v); + } + else + { + return m_bp_rank10(v); + } + } + size_type inv_id(size_type id) + { + if (id < size()) + { + return select_leaf(id + 1); + } + else + { + id = id + 1 - size(); + size_type lb = 0, rb = m_bp.size(); + while (rb - lb > 1) + { + size_type mid = lb + (rb - lb) / 2; + if (m_bp[mid] == 0 and m_bp[mid - 1] == 1) + { + ++mid; + } + size_type mid_id = m_bp_support.rank(mid - 1) + - m_bp_rank10(mid); + if (mid_id < id) + { + lb = mid; + } + else + { + rb = mid; + } + } + return lb; + } + } + size_type nodes() const + { + return m_bp.size() >> 1; + } + node_type node(size_type lb, size_type rb) const + { + return lca(select_leaf(lb + 1), select_leaf(rb + 1)); + } + size_type degree(node_type v) const + { + size_type res = 0; + v = v + 1; + while (m_bp[v]) + { + ++res; + v = m_bp_support.find_close(v) + 1; + } + return res; + } + size_type tlcp_idx(size_type i) const + { + size_type ii = 0; + if (i > 0) + { + size_type ipos = m_bp_select10(i) - 1; + size_type ip1pos = m_bp_select10(i + 1) - 1; + ii = m_bp_support.double_enclose(ipos, ip1pos); + } + ii = m_bp_support.find_close(ii); + return ii - m_bp_support.rank(ii) - m_bp_rank10(ii); + } +}; +} +#endif +#ifndef SDSL_DAC_VECTOR +#define SDSL_DAC_VECTOR +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +template > +class dac_vector +{ +private: + static_assert(t_b > 0, "dac_vector: t_b has to be larger than 0"); + static_assert(t_b < 64, "dac_vector: t_b has to be smaller than 64"); +public: + typedef typename int_vector<>::value_type value_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef const value_type const_reference; + typedef const_reference reference; + typedef const_reference * pointer; + typedef const pointer const_pointer; + typedef int_vector<>::size_type size_type; + typedef ptrdiff_t difference_type; + typedef t_rank rank_support_type; + typedef iv_tag index_category; +private: + int_vector m_data; + bit_vector m_overflow; + rank_support_type m_overflow_rank; + int_vector<64> m_level_pointer_and_rank = int_vector<64>(4, 0); + uint8_t m_max_level; +public: + dac_vector() = default; + dac_vector(dac_vector const & v) : + m_data(v.m_data), + m_overflow(v.m_overflow), + m_overflow_rank(v.m_overflow_rank), + m_level_pointer_and_rank(v.m_level_pointer_and_rank), + m_max_level(v.m_max_level) + { + m_overflow_rank.set_vector(&m_overflow); + } + dac_vector(dac_vector && v) + { + *this = std::move(v); + } + dac_vector & operator=(dac_vector const & v) + { + if (this != &v) + { + dac_vector tmp(v); + *this = std::move(tmp); + } + return *this; + } + dac_vector & operator=(dac_vector && v) + { + if (this != &v) + { + m_data = std::move(v.m_data); + m_overflow = std::move(v.m_overflow); + m_overflow_rank = std::move(v.m_overflow_rank); + m_overflow_rank.set_vector(&m_overflow); + m_level_pointer_and_rank = std::move(v.m_level_pointer_and_rank); + m_max_level = std::move(v.m_max_level); + } + return *this; + } + template + dac_vector(Container const & c); + template + dac_vector(int_vector_buffer & v_buf); + size_type size() const + { + return m_level_pointer_and_rank[2]; + } + static size_type max_size() + { + return int_vector<>::max_size() / 2; + } + bool empty() const + { + return 0 == m_level_pointer_and_rank[2]; + } + const const_iterator begin() const + { + return const_iterator(this, 0); + } + const const_iterator end() const + { + return const_iterator(this, size()); + } + value_type operator[](size_type i) const + { + uint8_t level = 1; + uint8_t offset = t_b; + size_type result = m_data[i]; + uint64_t const * p = m_level_pointer_and_rank.data(); + uint64_t ppi = (*p) + i; + while (level < m_max_level and m_overflow[ppi]) + { + p += 2; + ppi = *p + (m_overflow_rank(ppi) - *(p - 1)); + result |= (m_data[ppi] << (offset)); + ++level; + offset += t_b; + } + return result; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const; + void load(std::istream & in) + { + m_data.load(in); + m_overflow.load(in); + m_overflow_rank.load(in, &m_overflow); + m_level_pointer_and_rank.load(in); + read_member(m_max_level, in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const; + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar); + bool operator==(dac_vector const & v) const + { + return (m_max_level == v.m_max_level) && (m_data == v.m_data) && (m_overflow == v.m_overflow) + && (m_overflow_rank == v.m_overflow_rank) && (m_level_pointer_and_rank == v.m_level_pointer_and_rank); + } + bool operator!=(dac_vector const & v) const + { + return !(*this == v); + } +}; +template +template +dac_vector::dac_vector(Container const & c) +{ + size_type n = c.size(), val = 0; + if (n == 0) + return; + m_level_pointer_and_rank = int_vector<64>(128, 0); + m_level_pointer_and_rank[0] = n; + uint8_t level_x_2 = 0; + uint8_t max_level_x_2 = 4; + for (size_type i = 0; i < n; ++i) + { + val = c[i]; + val >>= t_b; + level_x_2 = 2; + while (val) + { + ++m_level_pointer_and_rank[level_x_2]; + val >>= t_b; + level_x_2 += 2; + max_level_x_2 = std::max(max_level_x_2, level_x_2); + } + } + m_level_pointer_and_rank.resize(max_level_x_2); + m_max_level = 0; + size_type sum_blocks = 0, last_block_size = 0; + for (size_type i = 0, t = 0; i < m_level_pointer_and_rank.size(); i += 2) + { + t = sum_blocks; + sum_blocks += m_level_pointer_and_rank[i]; + m_level_pointer_and_rank[i] = t; + if (sum_blocks > t) + { + ++m_max_level; + last_block_size = sum_blocks - t; + } + } + m_overflow = bit_vector(sum_blocks - last_block_size, 0); + m_data.resize(sum_blocks); + assert(last_block_size > 0); + int_vector<64> cnt = m_level_pointer_and_rank; + const uint64_t mask = bits::lo_set[t_b]; + for (size_type i = 0, j = 0; i < n; ++i) + { + val = c[i]; + j = cnt[0]++; + m_data[j] = val & mask; + val >>= t_b; + level_x_2 = 2; + while (val) + { + m_overflow[j] = 1; + j = cnt[level_x_2]++; + m_data[j] = val & mask; + val >>= t_b; + level_x_2 += 2; + } + } + util::init_support(m_overflow_rank, &m_overflow); + for (size_type i = 0; + 2 * i < m_level_pointer_and_rank.size() and m_level_pointer_and_rank[2 * i] < m_overflow.size(); + ++i) + { + m_level_pointer_and_rank[2 * i + 1] = m_overflow_rank(m_level_pointer_and_rank[2 * i]); + } +} +template +template +dac_vector::dac_vector(int_vector_buffer & v_buf) +{ + size_type n = v_buf.size(), val = 0; + if (n == 0) + return; + m_level_pointer_and_rank = int_vector<64>(128, 0); + m_level_pointer_and_rank[0] = n; + uint8_t level_x_2 = 0; + uint8_t max_level_x_2 = 4; + for (size_type i = 0; i < n; ++i) + { + val = v_buf[i]; + val >>= t_b; + level_x_2 = 2; + while (val) + { + ++m_level_pointer_and_rank[level_x_2]; + val >>= t_b; + level_x_2 += 2; + max_level_x_2 = std::max(max_level_x_2, level_x_2); + } + } + m_level_pointer_and_rank.resize(max_level_x_2); + m_max_level = 0; + size_type sum_blocks = 0, last_block_size = 0; + for (size_type i = 0, t = 0; i < m_level_pointer_and_rank.size(); i += 2) + { + t = sum_blocks; + sum_blocks += m_level_pointer_and_rank[i]; + m_level_pointer_and_rank[i] = t; + if (sum_blocks > t) + { + ++m_max_level; + last_block_size = sum_blocks - t; + } + } + m_overflow = bit_vector(sum_blocks - last_block_size, 0); + m_data.resize(sum_blocks); + assert(last_block_size > 0); + int_vector<64> cnt = m_level_pointer_and_rank; + const uint64_t mask = bits::lo_set[t_b]; + for (size_type i = 0, j = 0; i < n; ++i) + { + val = v_buf[i]; + j = cnt[0]++; + m_data[j] = val & mask; + val >>= t_b; + level_x_2 = 2; + while (val) + { + m_overflow[j] = 1; + j = cnt[level_x_2]++; + m_data[j] = val & mask; + val >>= t_b; + level_x_2 += 2; + } + } + util::init_support(m_overflow_rank, &m_overflow); + for (size_type i = 0; + 2 * i < m_level_pointer_and_rank.size() and m_level_pointer_and_rank[2 * i] < m_overflow.size(); + ++i) + { + m_level_pointer_and_rank[2 * i + 1] = m_overflow_rank(m_level_pointer_and_rank[2 * i]); + } +} +template +dac_vector<>::size_type +dac_vector::serialize(std::ostream & out, structure_tree_node * v, std::string name) const +{ + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_data.serialize(out, child, "data"); + written_bytes += m_overflow.serialize(out, child, "overflow"); + written_bytes += m_overflow_rank.serialize(out, child, "overflow_rank"); + written_bytes += m_level_pointer_and_rank.serialize(out, child, "level_pointer_and_rank"); + written_bytes += write_member(m_max_level, out, child, "max_level"); + structure_tree::add_size(child, written_bytes); + return written_bytes; +} +template +template +void dac_vector::CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const +{ + ar(CEREAL_NVP(m_max_level)); + ar(CEREAL_NVP(m_data)); + ar(CEREAL_NVP(m_overflow)); + ar(CEREAL_NVP(m_overflow_rank)); + ar(CEREAL_NVP(m_level_pointer_and_rank)); +} +template +template +void dac_vector::CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) +{ + ar(CEREAL_NVP(m_max_level)); + ar(CEREAL_NVP(m_data)); + ar(CEREAL_NVP(m_overflow)); + ar(CEREAL_NVP(m_overflow_rank)); + m_overflow_rank.set_vector(&m_overflow); + ar(CEREAL_NVP(m_level_pointer_and_rank)); +} +} +#endif +#ifndef INCLUDED_SDSL_LCP_DAC +#define INCLUDED_SDSL_LCP_DAC +#include +#ifndef INCLUDED_SDSL_LCP_VLC +#define INCLUDED_SDSL_LCP_VLC +#include +#include +#ifndef SDSL_VLC_VECTOR +#define SDSL_VLC_VECTOR +#include +#include +#include +#include +#include +#include +namespace sdsl +{ +template +struct vlc_vector_trait +{ + typedef int_vector<0> int_vector_type; +}; +template <> +struct vlc_vector_trait<32> +{ + typedef int_vector<32> int_vector_type; +}; +template , uint32_t t_dens = 128, uint8_t t_width = 0> +class vlc_vector +{ +private: + static_assert(t_dens > 1, "vlc_vector: Sampling density must be larger than 1"); +public: + typedef uint64_t value_type; + typedef random_access_const_iterator iterator; + typedef iterator const_iterator; + typedef const value_type reference; + typedef const value_type const_reference; + typedef value_type const * const_pointer; + typedef ptrdiff_t difference_type; + typedef int_vector<>::size_type size_type; + typedef t_coder coder; + typedef iv_tag index_category; + typedef typename vlc_vector_trait::int_vector_type int_vector_type; + static const uint32_t sample_dens = t_dens; + bit_vector m_z; +private: + int_vector_type m_sample_pointer; + size_type m_size = 0; + uint32_t m_sample_dens = t_dens; + void clear() + { + m_z.resize(0); + m_z.shrink_to_fit(); + m_size = 0; + m_sample_pointer.resize(0); + m_sample_pointer.shrink_to_fit(); + } +public: + vlc_vector() = default; + vlc_vector(vlc_vector const &) = default; + vlc_vector(vlc_vector &&) = default; + vlc_vector & operator=(vlc_vector const &) = default; + vlc_vector & operator=(vlc_vector &&) = default; + template + vlc_vector(Container const & c); + template + vlc_vector(int_vector_buffer & v_buf); + size_type size() const + { + return m_size; + } + static size_type max_size() + { + return int_vector<>::max_size() / 2; + } + bool empty() const + { + return 0 == m_size; + } + const const_iterator begin() const + { + return const_iterator(this, 0); + } + const const_iterator end() const + { + return const_iterator(this, this->m_size); + } + bool operator==(vlc_vector const & v) const + { + return m_size && v.m_size && m_z == v.m_z && m_sample_pointer == v.m_sample_pointer; + } + bool operator!=(vlc_vector const & v) const + { + return !(*this == v); + } + value_type operator[](size_type i) const; + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const; + void load(std::istream & in); + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const; + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar); + value_type sample(const size_type i) const; + uint32_t get_sample_dens() const; + void set_sample_dens(const uint32_t sdens); +}; +template +inline uint32_t vlc_vector::get_sample_dens() const +{ + if (t_dens == 0) + return m_sample_dens; + else + return t_dens; +} +template +inline void vlc_vector::set_sample_dens(const uint32_t sdens) +{ + m_sample_dens = sdens; +} +template +inline typename vlc_vector::value_type +vlc_vector::operator[](const size_type i) const +{ + assert(i + 1 != 0); + assert(i < m_size); + size_type idx = i / get_sample_dens(); + return (t_coder::template decode(m_z.data(), m_sample_pointer[idx], i - t_dens * idx + 1)) - 1; +} +template +template +vlc_vector::vlc_vector(Container const & c) +{ + clear(); + if (c.empty()) + return; + size_type samples = 0, z_size = 0; + for (size_type i = 0; i < c.size(); ++i) + { + if (c[i] + 1 < 1) + { + throw std::logic_error("vlc_vector cannot decode values smaller than 1!"); + } + z_size += t_coder::encoding_length(c[i] + 1); + } + samples = (c.size() + get_sample_dens() - 1) / get_sample_dens(); + m_sample_pointer = int_vector<>(samples + 1, 0, bits::hi(z_size + 1) + 1); + m_z.bit_resize(z_size); + z_size = 0; + uint64_t * z_data = t_coder::raw_data(m_z); + uint8_t offset = 0; + size_type no_sample = 0; + for (size_type i = 0, sample_cnt = 0; i < c.size(); ++i, --no_sample) + { + if (!no_sample) + { + no_sample = get_sample_dens(); + m_sample_pointer[sample_cnt++] = z_size; + } + t_coder::encode(c[i] + 1, z_data, offset); + z_size += t_coder::encoding_length(c[i] + 1); + } + m_size = c.size(); +} +template +template +vlc_vector::vlc_vector(int_vector_buffer & v_buf) +{ + clear(); + size_type n = v_buf.size(); + if (n == 0) + return; + size_type samples = 0, z_size = 0; + for (size_type i = 0; i < n; ++i) + { + size_type x = v_buf[i] + 1; + if (x < 1) + { + throw std::logic_error("vlc_vector cannot decode values smaller than 1!"); + } + z_size += t_coder::encoding_length(x); + } + samples = (n + get_sample_dens() - 1) / get_sample_dens(); + m_sample_pointer = int_vector<>(samples + 1, 0, bits::hi(z_size + 1) + 1); + m_z.bit_resize(z_size); + z_size = 0; + uint64_t * z_data = t_coder::raw_data(m_z); + uint8_t offset = 0; + size_type no_sample = 0; + for (size_type i = 0, sample_cnt = 0; i < n; ++i, --no_sample) + { + if (!no_sample) + { + no_sample = get_sample_dens(); + m_sample_pointer[sample_cnt++] = z_size; + } + size_type x = v_buf[i] + 1; + t_coder::encode(x, z_data, offset); + z_size += t_coder::encoding_length(x); + } + m_size = n; +} +template +vlc_vector<>::size_type +vlc_vector::serialize(std::ostream & out, structure_tree_node * v, std::string name) const +{ + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_size, out, child, "m_size"); + written_bytes += m_z.serialize(out, child, "m_z"); + written_bytes += m_sample_pointer.serialize(out, child, "m_sample_pointer"); + structure_tree::add_size(child, written_bytes); + return written_bytes; +} +template +void vlc_vector::load(std::istream & in) +{ + read_member(m_size, in); + m_z.load(in); + m_sample_pointer.load(in); +} +template +template +void vlc_vector::CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const +{ + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_z)); + ar(CEREAL_NVP(m_sample_pointer)); +} +template +template +void vlc_vector::CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) +{ + ar(CEREAL_NVP(m_size)); + ar(CEREAL_NVP(m_z)); + ar(CEREAL_NVP(m_sample_pointer)); +} +} +#endif +namespace sdsl +{ +template > +class lcp_vlc +{ +public: + typedef typename t_vlc_vec::value_type value_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef const value_type const_reference; + typedef const_reference reference; + typedef const_reference * pointer; + typedef const pointer const_pointer; + typedef typename t_vlc_vec::size_type size_type; + typedef typename t_vlc_vec::difference_type difference_type; + typedef t_vlc_vec vlc_vec_type; + typedef lcp_plain_tag lcp_category; + typedef lcp_tag index_category; + enum + { + fast_access = 0, + text_order = 0, + sa_order = 1 + }; + template + using type = lcp_vlc; +private: + vlc_vec_type m_vec; +public: + lcp_vlc() = default; + lcp_vlc(lcp_vlc const &) = default; + lcp_vlc(lcp_vlc &&) = default; + lcp_vlc & operator=(lcp_vlc const &) = default; + lcp_vlc & operator=(lcp_vlc &&) = default; + lcp_vlc(cache_config & config, std::string other_key = "") + { + std::string lcp_key = conf::KEY_LCP; + if ("" != other_key) + { + lcp_key = other_key; + } + int_vector_buffer<> lcp_buf(cache_file_name(lcp_key, config)); + m_vec = vlc_vec_type(lcp_buf); + } + size_type size() const + { + return m_vec.size(); + } + static size_type max_size() + { + return vlc_vec_type::max_size(); + } + bool empty() const + { + return m_vec.empty(); + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } + inline value_type operator[](size_type i) const + { + return m_vec[i]; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_vec.serialize(out, child, "vec"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + m_vec.load(in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_vec)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_vec)); + } + bool operator==(lcp_vlc const & other) const noexcept + { + return (m_vec == other.m_vec); + } + bool operator!=(lcp_vlc const & other) const noexcept + { + return !(*this == other); + } +}; +} +#endif +namespace sdsl +{ +template > +using lcp_dac = lcp_vlc>; +} +#endif +namespace sdsl +{ +struct cache_config; +template +class lcp_fully +{ +public: + typedef typename t_cst::size_type size_type; + typedef size_type value_type; + typedef random_access_const_iterator const_iterator; + typedef const_iterator iterator; + typedef lcp_tag lcp_category; + enum + { + fast_access = 0, + text_order = 0, + sa_order = 0 + }; +private: + t_cst const * m_cst; +public: + lcp_fully() = default; + lcp_fully(t_cst const * cst) : m_cst(cst){}; + lcp_fully(lcp_fully const &) = default; + lcp_fully(lcp_fully &&) = default; + lcp_fully & operator=(lcp_fully const &) = default; + lcp_fully & operator=(lcp_fully &&) = default; + ~lcp_fully() = default; + size_type size() const + { + return m_cst->size(); + } + value_type operator[](size_type i) const + { + if (0 == i) + { + return 0; + } + else + { + using leaf_type = typename t_cst::leaf_type; + using sampled_node_type = typename t_cst::sampled_node_type; + leaf_type v_l = i - 1; + leaf_type v_r = i; + size_type i; + sampled_node_type u; + return m_cst->depth_lca(v_l, v_r, i, u); + } + } + const_iterator begin() const + { + return const_iterator(this, 0); + } + const_iterator end() const + { + return const_iterator(this, size()); + } +}; +template , + uint32_t t_delta = 0, + class t_s_support = bp_support_sada<>, + class t_b = sd_vector<>, + class t_depth = dac_vector<>, + bool t_sample_leaves = false> +class cst_fully +{ +public: + typedef cst_dfs_const_forward_iterator const_iterator; + typedef typename t_csa::size_type size_type; + typedef t_csa csa_type; + typedef lcp_fully lcp_type; + typedef typename t_csa::char_type char_type; + typedef std::pair node_type; + typedef size_type leaf_type; + typedef size_type sampled_node_type; + typedef t_s_support s_support_type; + typedef t_b b_type; + typedef typename t_b::select_0_type b_select_0_type; + typedef typename t_b::select_1_type b_select_1_type; + typedef t_depth depth_type; + typedef typename t_csa::alphabet_category alphabet_category; + typedef cst_tag index_category; +private: + size_type m_delta; + size_type m_nodes; + csa_type m_csa; + bit_vector m_s; + s_support_type m_s_support; + b_type m_b; + b_select_0_type m_b_select0; + b_select_1_type m_b_select1; + depth_type m_depth; + lcp_type m_lcp = lcp_type(this); +public: + size_type const & delta = m_delta; + csa_type const & csa = m_csa; + bit_vector const & s = m_s; + s_support_type const & s_support = m_s_support; + b_type const & b = m_b; + b_select_0_type const & b_select_0 = m_b_select0; + b_select_1_type const & b_select_1 = m_b_select1; + depth_type const & depth_sampling = m_depth; + lcp_type const & lcp = m_lcp; + cst_fully() = default; + cst_fully(cst_fully const & cst) : + m_delta(cst.m_delta), + m_nodes(cst.m_nodes), + m_csa(cst.m_csa), + m_s(cst.m_s), + m_s_support(cst.m_s_support), + m_b(cst.m_b), + m_b_select0(cst.m_b_select0), + m_b_select1(cst.m_b_select1), + m_depth(cst.m_depth) + { + m_s_support.set_vector(&m_s); + m_b_select0.set_vector(&m_b); + m_b_select1.set_vector(&m_b); + } + cst_fully(cst_fully && cst) + { + *this = std::move(cst); + } + cst_fully(cache_config & config); + size_type size() const + { + return m_csa.size(); + } + static size_type max_size() + { + return t_csa::max_size(); + } + bool empty() const + { + return m_csa.empty(); + } + const_iterator begin() const + { + if (m_b.size() == 0) + { + return end(); + } + return const_iterator(this, root(), false, true); + } + const_iterator end() const + { + return const_iterator(this, root(), true, false); + } + cst_fully & operator=(cst_fully const & cst) + { + if (this != &cst) + { + cst_fully tmp(cst); + *this = std::move(tmp); + } + return *this; + } + cst_fully & operator=(cst_fully && cst) + { + if (this != &cst) + { + m_delta = cst.m_delta; + m_nodes = cst.m_nodes; + m_csa = std::move(cst.m_csa); + m_s = std::move(cst.m_s); + m_s_support = std::move(cst.m_s_support); + m_s_support.set_vector(&m_s); + m_b = std::move(cst.m_b); + m_b_select0 = std::move(cst.m_b_select0); + m_b_select0.set_vector(&m_b); + m_b_select1 = std::move(cst.m_b_select1); + m_b_select1.set_vector(&m_b); + m_depth = std::move(cst.m_depth); + } + return *this; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const + { + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += write_member(m_delta, out, child, "m_delta"); + written_bytes += write_member(m_nodes, out, child, "m_nodes"); + written_bytes += m_csa.serialize(out, child, "csa"); + written_bytes += m_s.serialize(out, child, "s"); + written_bytes += m_s_support.serialize(out, child, "s_support"); + written_bytes += m_b.serialize(out, child, "b"); + written_bytes += m_b_select0.serialize(out, child, "b_select0"); + written_bytes += m_b_select1.serialize(out, child, "b_select1"); + written_bytes += m_depth.serialize(out, child, "depth"); + structure_tree::add_size(child, written_bytes); + return written_bytes; + } + void load(std::istream & in) + { + read_member(m_delta, in); + read_member(m_nodes, in); + m_csa.load(in); + m_s.load(in); + m_s_support.load(in, &m_s); + m_b.load(in); + m_b_select0.load(in, &m_b); + m_b_select1.load(in, &m_b); + m_depth.load(in); + } + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const + { + ar(CEREAL_NVP(m_delta)); + ar(CEREAL_NVP(m_nodes)); + ar(CEREAL_NVP(m_csa)); + ar(CEREAL_NVP(m_s)); + ar(CEREAL_NVP(m_s_support)); + ar(CEREAL_NVP(m_b)); + ar(CEREAL_NVP(m_b_select0)); + ar(CEREAL_NVP(m_b_select1)); + ar(CEREAL_NVP(m_depth)); + } + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) + { + ar(CEREAL_NVP(m_delta)); + ar(CEREAL_NVP(m_nodes)); + ar(CEREAL_NVP(m_csa)); + ar(CEREAL_NVP(m_s)); + ar(CEREAL_NVP(m_s_support)); + m_s_support.set_vector(&m_s); + ar(CEREAL_NVP(m_b)); + ar(CEREAL_NVP(m_b_select0)); + m_b_select0.set_vector(&m_b); + ar(CEREAL_NVP(m_b_select1)); + m_b_select1.set_vector(&m_b); + ar(CEREAL_NVP(m_depth)); + } + bool operator==(cst_fully const & other) const noexcept + { + return (m_delta == other.m_delta) && (m_nodes == other.m_nodes) && (m_csa == other.m_csa) && (m_s == other.m_s) + && (m_s_support == other.m_s_support) && (m_b == other.m_b) && (m_b_select0 == other.m_b_select0) + && (m_b_select1 == other.m_b_select1) && (m_depth == other.m_depth); + } + bool operator!=(cst_fully const & other) const noexcept + { + return !(*this == other); + } + node_type root() const + { + return node_type(0, m_csa.size() - 1); + } + sampled_node_type sampled_root() const + { + return 0; + } + bool is_leaf(node_type v) const + { + return v.first == v.second; + } + node_type select_leaf(size_type i) const + { + assert(i > 0 and i <= m_csa.size()); + return node_type(i - 1, i - 1); + } + node_type node(size_type lb, size_type rb) const + { + return node_type(lb, rb); + } + leaf_type lb(node_type v) const + { + return v.first; + } + leaf_type rb(node_type v) const + { + return v.second; + } + size_type size(node_type const & v) const + { + return v.second - v.first + 1; + } + node_type leftmost_leaf(const node_type v) const + { + return node_type(v.first, v.first); + } + node_type rightmost_leaf(const node_type v) const + { + return node_type(v.second, v.second); + } + bool ancestor(node_type v, node_type w) const + { + return v.first <= w.first && v.second >= w.second; + } + sampled_node_type pred(leaf_type v) const + { + return m_b_select0.select(v + 1) - v - 1; + } + sampled_node_type lsa_leaf(leaf_type l) const + { + sampled_node_type p = pred(l); + if (m_s[p]) + { + return p; + } + else + { + return m_s_support.enclose(m_s_support.find_open(p)); + } + } + node_type sampled_node(sampled_node_type u) const + { + assert(m_s[u] == 1); + size_type u_end = m_s_support.find_close(u); + size_type b_left = m_b_select1.select(u + 1); + size_type b_right = m_b_select1.select(u_end + 1); + return node_type(b_left - u, b_right - u_end - 1); + } + sampled_node_type sampled_lca(sampled_node_type u, sampled_node_type q) const + { + assert(m_s[u] == 1 and m_s[q] == 1); + if (u > q) + { + std::swap(u, q); + } + else if (u == q) + { + return u; + } + if (u == sampled_root()) + { + return sampled_root(); + } + if (m_s_support.find_close(u) > q) + { + return u; + } + return m_s_support.double_enclose(u, q); + } + size_type depth(sampled_node_type u) const + { + assert(m_s[u] == 1); + size_type idx = m_s_support.rank(u) - 1; + return m_depth[idx] * (m_delta / 2); + } + size_type depth(node_type v) const + { + if (v == root()) + return 0; + else if (is_leaf(v)) + { + return m_csa.size() - m_csa[v.first]; + } + size_type i; + sampled_node_type u; + return depth_lca(v.first, v.second, i, u); + } + node_type lca(node_type v, node_type w) const + { + leaf_type l = std::min(v.first, w.first); + leaf_type r = std::max(v.second, w.second); + if (l == r) + { + return node_type(l, r); + } + else + { + return lca(l, r); + } + } + node_type lca(leaf_type l, leaf_type r) const + { + assert(l < r); + size_type i; + sampled_node_type u; + std::vector c(delta, 0); + depth_lca(l, r, i, u, c); + node_type v = sampled_node(u); + leaf_type lb = v.first; + leaf_type rb = v.second; + for (size_type k = 0; k < i; k++) + { + backward_search(m_csa, lb, rb, c[i - k - 1], lb, rb); + } + return node_type(lb, rb); + } + size_type depth_lca(leaf_type l, + leaf_type r, + size_type & res_i, + sampled_node_type & res_u, + std::vector & res_label) const + { + assert(l < r); + size_type max_d = 0; + size_type max_d_i = 0; + sampled_node_type max_d_node = 0; + for (size_type i = 0; i < m_delta; i++) + { + sampled_node_type node = sampled_lca(lsa_leaf(l), lsa_leaf(r)); + size_type d = i + depth(node); + if (d > max_d) + { + max_d = d; + max_d_i = i; + max_d_node = node; + } + char_type c = m_csa.F[l]; + char_type comp = csa.char2comp[c]; + res_label[i] = c; + if (l < m_csa.C[comp] || r >= m_csa.C[comp + 1]) + { + break; + } + l = m_csa.psi[l]; + r = m_csa.psi[r]; + } + res_i = max_d_i; + res_u = max_d_node; + return max_d; + } + size_type depth_lca(leaf_type l, leaf_type r, size_type & res_i, sampled_node_type & res_u) const + { + assert(l < r); + size_type max_d = 0; + size_type max_d_i = 0; + sampled_node_type max_d_node = 0; + for (size_type i = 0; i < m_delta; i++) + { + sampled_node_type node = sampled_lca(lsa_leaf(l), lsa_leaf(r)); + size_type d = i + depth(node); + if (d > max_d) + { + max_d = d; + max_d_i = i; + max_d_node = node; + } + char_type c = m_csa.F[l]; + char_type comp = csa.char2comp[c]; + if (l < m_csa.C[comp] || r >= m_csa.C[comp + 1]) + { + break; + } + l = m_csa.psi[l]; + r = m_csa.psi[r]; + } + res_i = max_d_i; + res_u = max_d_node; + return max_d; + } + node_type sl(node_type v) const + { + if (v == root()) + { + return root(); + } + else if (is_leaf(v)) + { + size_t leaf = m_csa.psi[v.first]; + return node_type(leaf, leaf); + } + return lca(m_csa.psi[v.first], m_csa.psi[v.second]); + } + node_type wl(node_type v, const char_type c) const + { + size_type l, r; + std::tie(l, r) = v; + backward_search(m_csa, l, r, c, l, r); + return node_type(l, r); + } + size_type sn(node_type v) const + { + assert(is_leaf(v)); + return m_csa[v.first]; + } + node_type parent(node_type v) const + { + const leaf_type l = v.first; + const leaf_type r = v.second; + node_type left_parent = root(); + node_type right_parent = root(); + if (l > 0) + { + left_parent = lca(l - 1, r); + } + if (r < m_csa.size() - 1) + { + right_parent = lca(l, r + 1); + } + return ancestor(right_parent, left_parent) ? left_parent : right_parent; + } + node_type child(node_type v, char_type c) const + { + if (is_leaf(v)) + { + return root(); + } + size_type d = depth(v); + return child(v, c, d); + } + node_type child(node_type v, char_type c, size_type d) const + { + leaf_type lower; + leaf_type upper; + { + leaf_type begin = v.first; + leaf_type end = v.second + 1; + while (begin < end) + { + leaf_type sample_pos = (begin + end) / 2; + size_type char_pos = get_char_pos(sample_pos, d, m_csa); + char_type sample = m_csa.F[char_pos]; + if (sample < c) + { + begin = sample_pos + 1; + } + else + { + end = sample_pos; + } + } + lower = begin; + } + { + leaf_type begin = v.first; + leaf_type end = v.second + 1; + while (begin < end) + { + leaf_type sample_pos = (begin + end) / 2; + size_type char_pos = get_char_pos(sample_pos, d, m_csa); + char_type sample = m_csa.F[char_pos]; + if (sample <= c) + { + begin = sample_pos + 1; + } + else + { + end = sample_pos; + } + } + upper = begin; + } + if (lower == upper) + { + return root(); + } + return node_type(lower, upper - 1); + } + node_type select_child(node_type v, size_type i) const + { + if (is_leaf(v)) + { + return root(); + } + size_type d = depth(v); + size_type char_pos = get_char_pos(v.first, d, m_csa); + char_type c = m_csa.F[char_pos]; + node_type res = child(v, c, d); + while (i > 1) + { + if (res.second >= v.second) + { + return root(); + } + char_pos = get_char_pos(res.second + 1, d, m_csa); + c = m_csa.F[char_pos]; + res = child(v, c, d); + i--; + } + return res; + } + size_type degree(node_type const & v) const + { + if (is_leaf(v)) + { + return 0; + } + else + { + size_type res = 1; + size_type d = depth(v); + size_type char_pos = get_char_pos(v.first, d, m_csa); + char_type c = m_csa.F[char_pos]; + node_type v_i = child(v, c, d); + while (v_i.second < v.second) + { + ++res; + char_pos = get_char_pos(v_i.second + 1, d, m_csa); + c = m_csa.F[char_pos]; + v_i = child(v, c, d); + } + return res; + } + } + cst_node_child_proxy children(node_type const & v) const + { + return cst_node_child_proxy(this, v); + } + node_type sibling(node_type v) const + { + node_type p = parent(v); + if (v.second >= p.second) + { + return root(); + } + size_type d = depth(p); + size_type char_pos = get_char_pos(v.second + 1, d, m_csa); + char_type c = m_csa.F[char_pos]; + return child(p, c, d); + } + char_type edge(node_type v, size_type d) const + { + assert(d >= 1 and d <= depth(v)); + size_type char_pos = get_char_pos(v.first, d - 1, m_csa); + return m_csa.F[char_pos]; + } + size_type node_depth(node_type v) const + { + size_type d = 0; + while (v != root()) + { + ++d; + v = parent(v); + } + return d; + } + size_type nodes() const + { + return m_nodes; + } + size_type sampled_nodes() const + { + return m_s.size() / 2; + } +}; +template +cst_fully::cst_fully(cache_config & config) +{ + cst_sada> cst(config); + m_nodes = cst.nodes(); + if (t_delta > 0) + { + m_delta = t_delta; + } + else + { + const size_type n = cst.size(); + m_delta = (bits::hi(n - 1) + 1) * (bits::hi(bits::hi(n - 1)) + 1); + if (m_delta < 2) + { + m_delta = 2; + } + } + size_type delta_half = m_delta / 2; + bit_vector is_sampled(cst.nodes(), false); + is_sampled[cst.id(cst.root())] = true; + size_type sample_count = 1; + if (t_sample_leaves) + { + auto event = memory_monitor::event("scan-leaves"); + size_type leaf_idx = 0; + for (size_type i = 0; i < cst.size(); i++) + { + const size_type d = i + 1; + if (d + delta_half <= cst.size() and d % delta_half == 0) + { + auto const node = cst.select_leaf(leaf_idx + 1); + const size_type id = cst.id(node); + if (!is_sampled[id]) + { + is_sampled[id] = true; + sample_count++; + } + } + leaf_idx = cst.csa.lf[leaf_idx]; + } + } + { + auto event = memory_monitor::event("scan-nodes"); + for (auto it = cst.begin(); it != cst.end(); ++it) + { + if (it.visit() == 1 and cst.is_leaf(*it) == false) + { + auto const node = *it; + const size_type d = cst.depth(node); + if (d % delta_half == 0) + { + auto v = cst.sl(node, delta_half); + const size_type id = cst.id(v); + if (!is_sampled[id]) + { + is_sampled[id] = true; + sample_count++; + } + } + } + } + } + m_s.resize(2 * sample_count); + util::set_to_value(m_s, 0); + bit_vector tmp_b(2 * sample_count + cst.size() + 2 * (cst.size() == 1), 0); + int_vector<64> tmp_depth; + tmp_depth.resize(sample_count); + { + auto event = memory_monitor::event("node-sampling"); + size_type s_idx = 0; + size_type b_idx = 0; + size_type sample_idx = 0; + for (auto it = cst.begin(); it != cst.end(); ++it) + { + auto node = *it; + if (it.visit() == 1 && is_sampled[cst.id(node)]) + { + m_s[s_idx++] = 1; + tmp_b[b_idx++] = 1; + tmp_depth[sample_idx++] = cst.depth(node) / delta_half; + } + if (cst.is_leaf(node)) + { + b_idx++; + } + if ((cst.is_leaf(node) || it.visit() == 2) && is_sampled[cst.id(node)]) + { + s_idx++; + tmp_b[b_idx++] = 1; + } + } + } + { + auto event = memory_monitor::event("ss-depth"); + m_csa = std::move(cst.csa); + util::init_support(m_s_support, &m_s); + m_b = b_type(tmp_b); + util::init_support(m_b_select0, &m_b); + util::init_support(m_b_select1, &m_b); + m_depth = depth_type(tmp_depth); + } +} +} +#endif +#ifndef INCLUDED_SDSL_CST_SCT3 +#define INCLUDED_SDSL_CST_SCT3 +#include +#include +#include +#include +#include +namespace sdsl +{ +template ::size_type> +struct bp_interval; +template , + class t_lcp = lcp_dac<>, + class t_bp_support = bp_support_sada<>, + class t_bv = bit_vector, + class t_rank = typename std:: + conditional::value, rank_support_v5<>, typename t_bv::rank_1_type>::type, + class t_sel = typename std::conditional< + std::is_same::value + and std::is_same::value, + select_support_scan<>, + typename t_bv::select_1_type>::type> +class cst_sct3 +{ + static_assert(std::is_same::type, csa_tag>::value, + "First template argument has to be a compressed suffix array."); +public: + typedef cst_dfs_const_forward_iterator const_iterator; + typedef cst_bottom_up_const_forward_iterator const_bottom_up_iterator; + typedef typename t_csa::size_type size_type; + typedef ptrdiff_t difference_type; + typedef t_csa csa_type; + typedef typename t_lcp::template type lcp_type; + typedef t_bp_support bp_support_type; + typedef typename t_csa::char_type char_type; + typedef typename t_csa::string_type string_type; + typedef bp_interval node_type; + typedef t_bv bv_type; + typedef t_rank rank_type; + typedef t_sel sel_type; + typedef typename t_csa::alphabet_type::comp_char_type comp_char_type; + typedef typename t_csa::alphabet_type::sigma_type sigma_type; + typedef typename t_csa::alphabet_category alphabet_category; + typedef cst_tag index_category; +private: + csa_type m_csa; + lcp_type m_lcp; + bit_vector m_bp; + bp_support_type m_bp_support; + bv_type m_first_child; + rank_type m_first_child_rank; + sel_type m_first_child_select; + size_type m_nodes; + inline size_type first_l_index(node_type const & node, size_type & kpos, size_type & ckpos) const + { + if (node.cipos > node.jp1pos) + { + ckpos = node.jp1pos - 1; + } + else + { + ckpos = node.cipos - 1; + } + assert(m_bp[ckpos] == 0); + kpos = m_bp_support.find_open(ckpos); + return m_bp_support.rank(kpos) - 1; + } + size_type select_l_index(node_type const & v, size_type i, size_type & kpos, size_type & ckpos) const + { + assert(i > 0); + if (v.cipos > v.jp1pos) + { + ckpos = v.jp1pos - 1; + } + else + { + ckpos = v.cipos - 1; + } + assert(m_bp[ckpos] == 0); + if (1 == i) + { + kpos = m_bp_support.find_open(ckpos); + return m_bp_support.rank(kpos) - 1; + } + else + { + size_type r = ckpos - m_bp_support.rank(ckpos); + if (r + 1 >= i) + { + if (i < degree(v)) + { + ckpos -= (i - 1); + assert(m_bp[ckpos] == 0); + kpos = m_bp_support.find_open(ckpos); + return m_bp_support.rank(kpos) - 1; + } + } + kpos = v.jp1pos; + if (kpos < m_bp.size()) + ckpos = m_bp_support.find_close(kpos); + else + ckpos = m_bp.size(); + return v.j + 1; + } + } + inline size_type closing_pos_of_first_l_index(node_type const & node) const + { + if (node.cipos > node.jp1pos) + { + return node.jp1pos - 1; + } + else + { + return node.cipos - 1; + } + } + inline size_type nsv(SDSL_UNUSED size_type i, size_type ipos) const + { + size_type cipos = m_bp_support.find_close(ipos); + size_type result = m_bp_support.rank(cipos); + return result; + } + inline size_type + psv(SDSL_UNUSED size_type i, size_type ipos, size_type cipos, size_type & psvpos, size_type & psvcpos) const + { + if ((cipos + (size_type)m_csa.sigma) >= m_bp.size()) + { + psvpos = 0; + psvcpos = m_bp.size() - 1; + return 0; + } + if (m_bp[cipos + 1]) + { + psvpos = m_bp_support.enclose(ipos); + psvcpos = m_bp_support.find_close(psvpos); + return m_bp_support.rank(psvpos) - 1; + } + size_type r0 = cipos - m_bp_support.rank(cipos); + size_type next_first_child = 0; + uint64_t const * p = m_first_child.data() + (r0 >> 6); + uint64_t w = (*p) >> (r0 & 0x3F); + if (w) + { + next_first_child = cipos + bits::lo(w); + if (cipos == next_first_child and m_bp[next_first_child + 1]) + { + psvpos = m_bp_support.enclose(ipos); + psvcpos = m_bp_support.find_close(psvpos); + return m_bp_support.rank(psvpos) - 1; + } + } + else + { + size_type delta = 63 - (r0 & 0x3F); + ++p; + int steps = 4; + while (!(w = *p) and steps-- > 0) + { + ++p; + delta += 64; + } + if (w != 0) + { + delta += bits::lo(w) + 1; + } + else + { + auto pos = m_first_child_select(m_first_child_rank(r0 + 1) + 1); + delta = pos - r0; + } + next_first_child = cipos + delta; + } + if (!m_bp[next_first_child + 1]) + { + psvcpos = next_first_child + 1; + psvpos = m_bp_support.find_open(psvcpos); + return m_bp_support.rank(psvpos) - 1; + } + else + { + psvpos = m_bp_support.enclose(m_bp_support.find_open(next_first_child)); + psvcpos = m_bp_support.find_close(psvpos); + return m_bp_support.rank(psvpos) - 1; + } + } + inline size_type rmq(size_type l, size_type r) const + { + size_type i = m_bp_support.select(l + 1); + size_type j = m_bp_support.select(r + 1); + size_type fc_i = m_bp_support.find_close(i); + if (j < fc_i) + { + return l; + } + else + { + size_type ec = m_bp_support.rr_enclose(i, j); + if (ec == m_bp_support.size()) + { + return r; + } + else + { + return m_bp_support.rank(ec) - 1; + } + } + } +public: + csa_type const & csa = m_csa; + lcp_type const & lcp = m_lcp; + bit_vector const & bp = m_bp; + bp_support_type const & bp_support = m_bp_support; + bv_type const & first_child_bv = m_first_child; + rank_type const & first_child_rank = m_first_child_rank; + sel_type const & first_child_select = m_first_child_select; + cst_sct3() = default; + cst_sct3(cache_config & cache, bool build_only_bps = false); + cst_sct3(cst_sct3 const & cst) : + m_csa(cst.m_csa), + m_bp(cst.m_bp), + m_bp_support(cst.m_bp_support), + m_first_child(cst.m_first_child), + m_first_child_rank(cst.m_first_child_rank), + m_first_child_select(cst.m_first_child_select), + m_nodes(cst.m_nodes) + { + copy_lcp(m_lcp, cst.m_lcp, *this); + m_bp_support.set_vector(&m_bp); + m_first_child_rank.set_vector(&m_first_child); + m_first_child_select.set_vector(&m_first_child); + } + cst_sct3(cst_sct3 && cst) : + m_csa(std::move(cst.m_csa)), + m_bp(std::move(cst.m_bp)), + m_bp_support(std::move(cst.m_bp_support)), + m_first_child(std::move(cst.m_first_child)), + m_first_child_rank(std::move(cst.m_first_child_rank)), + m_first_child_select(std::move(cst.m_first_child_select)), + m_nodes(cst.m_nodes) + { + move_lcp(m_lcp, cst.m_lcp, *this); + m_bp_support.set_vector(&m_bp); + m_first_child_rank.set_vector(&m_first_child); + m_first_child_select.set_vector(&m_first_child); + } + size_type size() const + { + return m_bp.size() >> 1; + } + static size_type max_size() + { + return t_csa::max_size(); + } + bool empty() const + { + return m_csa.empty(); + } + const_iterator begin() const + { + if (0 == m_bp.size()) + return end(); + return const_iterator(this, root(), false, true); + }; + const_iterator begin(node_type const & v) const + { + if (0 == m_bp.size() and root() == v) + return end(); + return const_iterator(this, v, false, true); + } + const_iterator end() const + { + return const_iterator(this, root(), true, false); + } + const_iterator end(node_type const & v) const + { + if (root() == v) + return end(); + return ++const_iterator(this, v, true, true); + } + const_bottom_up_iterator begin_bottom_up() const + { + if (0 == m_bp.size()) + return end_bottom_up(); + return const_bottom_up_iterator(this, leftmost_leaf(root())); + } + const_bottom_up_iterator end_bottom_up() const + { + return const_bottom_up_iterator(this, root(), false); + } + cst_sct3 & operator=(cst_sct3 const & cst) + { + if (this != &cst) + { + cst_sct3 tmp(cst); + *this = std::move(tmp); + } + return *this; + } + cst_sct3 & operator=(cst_sct3 && cst) + { + if (this != &cst) + { + m_csa = std::move(cst.m_csa); + move_lcp(m_lcp, cst.m_lcp, *this); + m_bp = std::move(cst.m_bp); + m_bp_support = std::move(cst.m_bp_support); + m_bp_support.set_vector(&m_bp); + m_first_child = std::move(cst.m_first_child); + m_first_child_rank = std::move(cst.m_first_child_rank); + m_first_child_rank.set_vector(&m_first_child); + m_first_child_select = std::move(cst.m_first_child_select); + m_first_child_select.set_vector(&m_first_child); + m_nodes = std::move(cst.m_nodes); + } + return *this; + } + size_type serialize(std::ostream & out, structure_tree_node * v = nullptr, std::string name = "") const; + void load(std::istream & in); + template + void CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const; + template + void CEREAL_LOAD_FUNCTION_NAME(archive_t & ar); + bool operator==(cst_sct3 const & other) const noexcept + { + return (m_csa == other.m_csa) && (m_lcp == other.m_lcp) && (m_bp == other.m_bp) + && (m_bp_support == other.m_bp_support) && (m_first_child == other.m_first_child) + && (m_first_child_rank == other.m_first_child_rank) + && (m_first_child_select == other.m_first_child_select) ; + } + bool operator!=(cst_sct3 const & other) const noexcept + { + return !(*this == other); + } + node_type root() const + { + return node_type(0, size() - 1, 0, m_bp.size() - 1, m_bp.size()); + } + bool is_leaf(node_type const & v) const + { + return v.i == v.j; + } + node_type select_leaf(size_type i) const + { + assert(i > 0 and i <= size()); + size_type ipos = m_bp_support.select(i); + size_type jp1pos; + if (i == size()) + jp1pos = m_bp.size(); + else if (m_bp[ipos + 1]) + jp1pos = ipos + 1; + else + jp1pos = m_bp_support.select(i + 1); + return node_type(i - 1, i - 1, ipos, m_bp_support.find_close(ipos), jp1pos); + } + size_type size(node_type const & v) const + { + return v.j - v.i + 1; + } + node_type leftmost_leaf(node_type const & v) const + { + return select_leaf(v.i + 1); + } + node_type rightmost_leaf(node_type const & v) const + { + return select_leaf(v.j + 1); + } + size_type lb(node_type const & v) const + { + return v.i; + } + size_type rb(node_type const & v) const + { + return v.j; + } + node_type parent(node_type const & v) const + { + if (v.cipos > v.jp1pos) + { + size_type psv_pos, psv_cpos, psv_v, nsv_v, nsv_p1pos; + psv_v = psv(v.j + 1, v.jp1pos, m_bp_support.find_close(v.jp1pos), psv_pos, psv_cpos); + nsv_v = nsv(v.j + 1, v.jp1pos) - 1; + if (nsv_v == size() - 1) + { + nsv_p1pos = m_bp.size(); + } + else + { + nsv_p1pos = m_bp_support.select(nsv_v + 2); + } + return node_type(psv_v, nsv_v, psv_pos, psv_cpos, nsv_p1pos); + } + else + { + size_type psv_pos, psv_cpos, psv_v; + psv_v = psv(v.i, v.ipos, v.cipos, psv_pos, psv_cpos); + return node_type(psv_v, v.j, psv_pos, psv_cpos, v.jp1pos); + } + } + cst_node_child_proxy children(node_type const & v) const + { + return cst_node_child_proxy(this, v); + } + node_type sibling(node_type const & v) const + { + if (v.cipos < v.jp1pos) + { + return root(); + } + size_type cjp1posm1 = m_bp_support.find_close(v.jp1pos) - 1; + bool last_child = m_bp[cjp1posm1]; + if (!last_child) + { + size_type first_child_idx = cjp1posm1 - m_bp_support.rank(cjp1posm1); + last_child = m_first_child[first_child_idx]; + } + if (last_child) + { + size_type nsv_v = nsv(v.j + 1, v.jp1pos) - 1, nsv_p1pos; + if (nsv_v == size() - 1) + { + nsv_p1pos = m_bp.size(); + } + else + { + nsv_p1pos = m_bp_support.select(nsv_v + 2); + } + return node_type(v.j + 1, nsv_v, v.jp1pos, m_bp_support.find_close(v.jp1pos), nsv_p1pos); + } + else + { + size_type new_j = m_bp_support.rank(m_bp_support.find_open(cjp1posm1)) - 2; + return node_type(v.j + 1, + new_j, + v.jp1pos, + m_bp_support.find_close(v.jp1pos), + m_bp_support.select(new_j + 2)); + } + } + node_type select_child(node_type const & v, size_type i) const + { + assert(i > 0); + if (is_leaf(v)) + return root(); + if (1 == i) + { + size_type k = 0, kpos = 0, k_find_close = 0; + k = select_l_index(v, 1, kpos, k_find_close); + return node_type(v.i, k - 1, v.ipos, v.cipos, kpos); + } + else + { + size_type k1, kpos1, k_find_close1; + k1 = select_l_index(v, i - 1, kpos1, k_find_close1); + if (k1 == v.j + 1) + return root(); + size_type k2, kpos2, k_find_close2; + k2 = select_l_index(v, i, kpos2, k_find_close2); + return node_type(k1, k2 - 1, kpos1, k_find_close1, kpos2); + } + } + size_type degree(node_type const & v) const + { + if (is_leaf(v)) + return 0; + size_type r = closing_pos_of_first_l_index(v); + size_type r0 = r - m_bp_support.rank(r); + uint64_t const * p = m_first_child.data() + (r0 >> 6); + uint8_t offset = r0 & 0x3F; + uint64_t w = (*p) & bits::lo_set[offset]; + if (w) + { + return offset - bits::hi(w) + 1; + } + else if (m_first_child.data() == p) + { + return offset + 2; + } + else + { + size_type res = offset + 2; + int steps = 4; + while (p > m_first_child.data() and steps-- > 0) + { + w = *(--p); + if (0 == w) + res += 64; + else + { + return res + (63 - bits::hi(w)); + } + } + auto goal_rank = m_first_child_rank(r0); + if (goal_rank == 0) + { + return r0 + 2; + } + else + { + return r0 - m_first_child_select(goal_rank) + 1; + } + } + } + node_type child(node_type const & v, const char_type c, size_type & char_pos) const + { + if (is_leaf(v)) + return root(); + comp_char_type cc = m_csa.char2comp[c]; + if (cc == 0 and c != 0) + return root(); + size_type char_ex_max_pos = m_csa.C[((size_type)1) + cc], char_inc_min_pos = m_csa.C[cc]; + size_type d = depth(v); + char_pos = get_char_pos(v.i, d, m_csa); + if (char_pos >= char_ex_max_pos) + { + return root(); + } + else if (char_pos >= char_inc_min_pos) + { + return select_child(v, 1); + } + size_type child_cnt = degree(v); + char_pos = get_char_pos(v.j, d, m_csa); + if (char_pos < char_inc_min_pos) + { + return root(); + } + else if (char_pos < char_ex_max_pos) + { + return select_child(v, child_cnt); + } + size_type l_bound = 2, r_bound = child_cnt, mid, kpos, ckpos, l_index; + while (l_bound < r_bound) + { + mid = (l_bound + r_bound) >> 1; + l_index = select_l_index(v, mid - 1, kpos, ckpos); + char_pos = get_char_pos(l_index, d, m_csa); + if (char_inc_min_pos > char_pos) + { + l_bound = mid + 1; + } + else if (char_ex_max_pos <= char_pos) + { + r_bound = mid; + } + else + { + size_type lp1_index = m_bp_support.rank(m_bp_support.find_open(ckpos - 1)) - 1; + size_type jp1pos = m_bp.size(); + if (lp1_index - 1 < size() - 1) + { + jp1pos = m_bp_support.select(lp1_index + 1); + } + return node_type(l_index, lp1_index - 1, kpos, ckpos, jp1pos); + } + } + return root(); + } + node_type child(node_type const & v, const char_type c) const + { + size_type char_pos; + return child(v, c, char_pos); + } + char_type edge(node_type const & v, size_type d) const + { + assert(1 <= d); + assert(d <= depth(v)); + size_type order = get_char_pos(v.i, d - 1, m_csa); + size_type c_begin = 1, c_end = ((size_type)m_csa.sigma) + 1, mid; + while (c_begin < c_end) + { + mid = (c_begin + c_end) >> 1; + if (m_csa.C[mid] <= order) + { + c_begin = mid + 1; + } + else + { + c_end = mid; + } + } + return m_csa.comp2char[c_begin - 1]; + } + node_type lca(node_type v, node_type w) const + { + if (v.i > w.i or (v.i == w.i and v.j < w.j)) + { + std::swap(v, w); + } + if (v.j >= w.j) + { + return v; + } + else + { + size_type min_index = rmq(v.i + 1, w.j); + size_type min_index_pos = m_bp_support.select(min_index + 1); + size_type min_index_cpos = m_bp_support.find_close(min_index_pos); + if (min_index_cpos >= (m_bp.size() - m_csa.sigma)) + { + return root(); + } + size_type new_j = nsv(min_index, min_index_pos) - 1; + size_type new_ipos, new_icpos; + size_type new_i = psv(min_index, min_index_pos, min_index_cpos, new_ipos, new_icpos); + size_type jp1pos = m_bp.size(); + if (new_j < size() - 1) + { + jp1pos = m_bp_support.select(new_j + 2); + } + return node_type(new_i, new_j, new_ipos, new_icpos, jp1pos); + } + } + size_type depth(node_type const & v) const + { + if (v == root()) + { + return 0; + } + else if (v.i == v.j) + { + return size() - m_csa[v.i]; + } + else + { + size_type kpos, ckpos; + size_type l = select_l_index(v, 1, kpos, ckpos); + return m_lcp[l]; + } + } + size_type node_depth(node_type v) const + { + size_type d = 0; + while (v != root()) + { + ++d; + v = parent(v); + } + return d; + } + node_type sl(node_type const & v) const + { + if (v == root()) + return root(); + size_type i = m_csa.psi[v.i]; + if (is_leaf(v)) + { + if (v.i == 0 and v.j == 0) + return root(); + else + return select_leaf(i + 1); + } + size_type j = m_csa.psi[v.j]; + assert(i < j); + size_type min_index = rmq(i + 1, j); + size_type min_index_pos = m_bp_support.select(min_index + 1); + size_type min_index_cpos = m_bp_support.find_close(min_index_pos); + if (min_index_cpos >= (m_bp.size() - m_csa.sigma)) + { + return root(); + } + size_type new_j = nsv(min_index, min_index_pos) - 1; + size_type new_ipos, new_icpos; + size_type new_i = psv(min_index, min_index_pos, min_index_cpos, new_ipos, new_icpos); + size_type jp1pos = m_bp.size(); + if (new_j < size() - 1) + { + jp1pos = m_bp_support.select(new_j + 2); + } + return node_type(new_i, new_j, new_ipos, new_icpos, jp1pos); + } + node_type wl(node_type const & v, const char_type c) const + { + size_type c_left = m_csa.bwt.rank(v.i, c); + size_type c_right = m_csa.bwt.rank(v.j + 1, c); + if (c_left == c_right) + return root(); + if (c_left + 1 == c_right) + return select_leaf(m_csa.C[m_csa.char2comp[c]] + c_left + 1); + else + { + size_type left = m_csa.C[m_csa.char2comp[c]] + c_left; + size_type right = m_csa.C[m_csa.char2comp[c]] + c_right - 1; + assert(left < right); + size_type ipos = m_bp_support.select(left + 1); + size_type jp1pos = m_bp.size(); + if (right < size() - 1) + { + jp1pos = m_bp_support.select(right + 2); + } + return node_type(left, right, ipos, m_bp_support.find_close(ipos), jp1pos); + } + } + size_type sn(node_type const & v) const + { + assert(is_leaf(v)); + return m_csa[v.i]; + } + size_type id(node_type const & v) const + { + if (is_leaf(v)) + { + return v.i; + } + size_type ckpos; + if (v.cipos > v.jp1pos) + { + ckpos = v.jp1pos - 1; + } + else + { + ckpos = v.cipos - 1; + } + assert(m_bp[ckpos] == 0); + size_type r0ckpos = ckpos - m_bp_support.rank(ckpos); + return size() + m_first_child_rank(r0ckpos); + } + node_type inv_id(size_type id) + { + if (id < size()) + { + return select_leaf(id + 1); + } + else + { + size_type r0ckpos = 0; + { + id = id - size() + 1; + size_type lb = 0, rb = m_bp.size(); + while (rb - lb > 1) + { + size_type mid = lb + (rb - lb) / 2; + size_type arg = m_first_child_rank(mid); + if (arg < id) + { + lb = mid; + } + else + { + rb = mid; + } + } + r0ckpos = lb; + } + size_type ckpos = 0; + { + size_type lb = 0, rb = m_bp.size(); + while (rb - lb > 1) + { + size_type mid = lb + (rb - lb) / 2; + size_type arg = mid - m_bp_support.rank(mid - 1); + if (arg < r0ckpos + 1) + { + lb = mid; + } + else + { + rb = mid; + } + } + ckpos = lb; + } + if (ckpos == m_bp.size() - 1) + { + return root(); + } + if (m_bp[ckpos + 1]) + { + size_type jp1pos = ckpos + 1; + size_type j = m_bp_support.rank(jp1pos - 1) - 1; + size_type kpos = m_bp_support.find_open(ckpos); + size_type ipos = m_bp_support.enclose(kpos); + size_type cipos = m_bp_support.find_close(ipos); + size_type i = m_bp_support.rank(ipos - 1); + return node_type(i, j, ipos, cipos, jp1pos); + } + else + { + size_type cipos = ckpos + 1; + size_type ipos = m_bp_support.find_open(cipos); + size_type i = m_bp_support.rank(ipos - 1); + size_type j = nsv(i, ipos) - 1; + size_type jp1pos = m_bp.size(); + if (j != size() - 1) + { + jp1pos = m_bp_support.select(j + 2); + } + return node_type(i, j, ipos, cipos, jp1pos); + } + } + } + size_type nodes() const + { + return m_nodes; + } + node_type node(size_type lb, size_type rb) const + { + size_type ipos = m_bp_support.select(lb + 1); + size_type jp1pos; + if (rb == size() - 1) + { + jp1pos = m_bp.size(); + } + else + { + jp1pos = m_bp_support.select(rb + 2); + } + return node_type(lb, rb, ipos, m_bp_support.find_close(ipos), jp1pos); + } + size_type tlcp_idx(size_type i) const + { + size_type ipos = m_bp_support.select(i + 1); + size_type cipos = m_bp_support.find_close(ipos); + return m_first_child_rank.rank(((ipos + cipos - 1) >> 1) - i); + } +}; +template +cst_sct3::cst_sct3(cache_config & config, bool build_only_bps) +{ + { + auto event = memory_monitor::event("bps-sct"); + int_vector_buffer<> lcp_buf(cache_file_name(conf::KEY_LCP, config)); + m_nodes = construct_supercartesian_tree_bp_succinct_and_first_child(lcp_buf, m_bp, m_first_child); + m_nodes += m_bp.size() / 2; + if (m_bp.size() == 2) + { + m_nodes = 1; + } + } + { + auto event = memory_monitor::event("bpss-sct"); + util::init_support(m_bp_support, &m_bp); + util::init_support(m_first_child_rank, &m_first_child); + util::init_support(m_first_child_select, &m_first_child); + } + if (!build_only_bps) + { + auto event = memory_monitor::event("clcp"); + cache_config tmp_config(false, config.dir, config.id, config.file_map); + construct_lcp(m_lcp, *this, tmp_config); + config.file_map = tmp_config.file_map; + } + if (!build_only_bps) + { + auto event = memory_monitor::event("load csa"); + load_from_cache(m_csa, std::string(conf::KEY_CSA) + "_" + util::class_to_hash(m_csa), config); + } +} +template +auto cst_sct3::serialize(std::ostream & out, + structure_tree_node * v, + std::string name) const -> size_type +{ + structure_tree_node * child = structure_tree::add_child(v, name, util::class_name(*this)); + size_type written_bytes = 0; + written_bytes += m_csa.serialize(out, child, "csa"); + written_bytes += m_lcp.serialize(out, child, "lcp"); + written_bytes += m_bp.serialize(out, child, "bp"); + written_bytes += m_bp_support.serialize(out, child, "bp_support"); + written_bytes += m_first_child.serialize(out, child, "mark_child"); + written_bytes += m_first_child_rank.serialize(out, child, "mark_child_rank"); + written_bytes += m_first_child_select.serialize(out, child, "mark_child_select"); + written_bytes += write_member(m_nodes, out, child, "node_cnt"); + structure_tree::add_size(child, written_bytes); + return written_bytes; +} +template +void cst_sct3::load(std::istream & in) +{ + m_csa.load(in); + load_lcp(m_lcp, in, *this); + m_bp.load(in); + m_bp_support.load(in, &m_bp); + m_first_child.load(in); + m_first_child_rank.load(in, &m_first_child); + m_first_child_select.load(in, &m_first_child); + read_member(m_nodes, in); +} +template +template +void cst_sct3::CEREAL_SAVE_FUNCTION_NAME(archive_t & ar) const +{ + ar(CEREAL_NVP(m_csa)); + ar(CEREAL_NVP(m_lcp)); + ar(CEREAL_NVP(m_bp)); + ar(CEREAL_NVP(m_bp_support)); + ar(CEREAL_NVP(m_first_child)); + ar(CEREAL_NVP(m_first_child_rank)); + ar(CEREAL_NVP(m_first_child_select)); + ar(CEREAL_NVP(m_nodes)); +} +template +template +void cst_sct3::CEREAL_LOAD_FUNCTION_NAME(archive_t & ar) +{ + ar(CEREAL_NVP(m_csa)); + ar(CEREAL_NVP(m_lcp)); + set_lcp_pointer(m_lcp, *this); + ar(CEREAL_NVP(m_bp)); + ar(CEREAL_NVP(m_bp_support)); + m_bp_support.set_vector(&m_bp); + ar(CEREAL_NVP(m_first_child)); + ar(CEREAL_NVP(m_first_child_rank)); + m_first_child_rank.set_vector(&m_first_child); + ar(CEREAL_NVP(m_first_child_select)); + m_first_child_select.set_vector(&m_first_child); + ar(CEREAL_NVP(m_nodes)); +} +template +struct bp_interval +{ + t_int i; + t_int j; + t_int ipos; + t_int cipos; + t_int jp1pos; + bp_interval(t_int i = 0, t_int j = 0, t_int ipos = 0, t_int cipos = 0, t_int jp1pos = 0) : + i(i), + j(j), + ipos(ipos), + cipos(cipos), + jp1pos(jp1pos){}; + bp_interval(bp_interval const & iv) = default; + bp_interval(bp_interval && iv) = default; + bool operator<(bp_interval const & interval) const + { + if (i != interval.i) + return i < interval.i; + return j < interval.j; + } + bool operator==(bp_interval const & interval) const + { + return i == interval.i and j == interval.j; + } + bool operator!=(bp_interval const & interval) const + { + return !(*this == interval); + } + bp_interval & operator=(bp_interval const & interval) = default; + bp_interval & operator=(bp_interval && interval) = default; +}; +template +inline std::ostream & operator<<(std::ostream & os, bp_interval const & interval) +{ + os << "-[" << interval.i << "," << interval.j << "](" << interval.ipos << "," << interval.cipos << "," + << interval.jp1pos << ")"; + return os; +} +} +#endif +#endif +// clang-format on + diff --git a/include/seqan3/core/detail/customisation_point.hpp b/include/seqan3/core/detail/customisation_point.hpp index f447346b89..e1bb9710bd 100644 --- a/include/seqan3/core/detail/customisation_point.hpp +++ b/include/seqan3/core/detail/customisation_point.hpp @@ -9,6 +9,8 @@ #pragma once +#include + #include namespace seqan3::detail diff --git a/include/seqan3/core/platform.hpp b/include/seqan3/core/platform.hpp index 0940e61de7..e2e929ec59 100644 --- a/include/seqan3/core/platform.hpp +++ b/include/seqan3/core/platform.hpp @@ -105,14 +105,6 @@ # error SeqAn3 include directory not set correctly. Forgot to add -I ${INSTALLDIR}/include to your CXXFLAGS? #endif -// SDSL [required] -#if __has_include() -# include -static_assert(sdsl::sdsl_version_major == 3, "Only version 3 of the SDSL is supported by SeqAn3."); -#else -# error The sdsl library was not included correctly. Forgot to add -I ${INSTALLDIR}/include to your CXXFLAGS? -#endif - // Cereal [optional] /*!\def SEQAN3_WITH_CEREAL * \brief Whether CEREAL support is available or not. diff --git a/include/seqan3/io/detail/ignore_output_iterator.hpp b/include/seqan3/io/detail/ignore_output_iterator.hpp index ae7d696130..7afec6b80c 100644 --- a/include/seqan3/io/detail/ignore_output_iterator.hpp +++ b/include/seqan3/io/detail/ignore_output_iterator.hpp @@ -9,6 +9,8 @@ #pragma once +#include + #include namespace seqan3::detail diff --git a/include/seqan3/io/detail/in_file_iterator.hpp b/include/seqan3/io/detail/in_file_iterator.hpp index bbc77543cd..d8631e555a 100644 --- a/include/seqan3/io/detail/in_file_iterator.hpp +++ b/include/seqan3/io/detail/in_file_iterator.hpp @@ -10,6 +10,7 @@ #pragma once #include +#include #include #include diff --git a/include/seqan3/search/dream_index/interleaved_bloom_filter.hpp b/include/seqan3/search/dream_index/interleaved_bloom_filter.hpp index 2039e88ea7..3e17b37bb2 100644 --- a/include/seqan3/search/dream_index/interleaved_bloom_filter.hpp +++ b/include/seqan3/search/dream_index/interleaved_bloom_filter.hpp @@ -12,8 +12,7 @@ #include #include -#include - +#include #include #include diff --git a/include/seqan3/search/fm_index/bi_fm_index_cursor.hpp b/include/seqan3/search/fm_index/bi_fm_index_cursor.hpp index 1215dbf526..ed132fa079 100644 --- a/include/seqan3/search/fm_index/bi_fm_index_cursor.hpp +++ b/include/seqan3/search/fm_index/bi_fm_index_cursor.hpp @@ -12,11 +12,10 @@ #include #include -#include - #include #include #include +#include #include #include #include diff --git a/include/seqan3/search/fm_index/concept.hpp b/include/seqan3/search/fm_index/concept.hpp index 6e61097115..96c65e36c7 100644 --- a/include/seqan3/search/fm_index/concept.hpp +++ b/include/seqan3/search/fm_index/concept.hpp @@ -12,8 +12,7 @@ #include #include -#include - +#include #include namespace seqan3::detail diff --git a/include/seqan3/search/fm_index/fm_index.hpp b/include/seqan3/search/fm_index/fm_index.hpp index cd8cfc93f0..c391f92b45 100644 --- a/include/seqan3/search/fm_index/fm_index.hpp +++ b/include/seqan3/search/fm_index/fm_index.hpp @@ -13,9 +13,8 @@ #include #include -#include - #include +#include #include #include #include diff --git a/include/seqan3/search/fm_index/fm_index_cursor.hpp b/include/seqan3/search/fm_index/fm_index_cursor.hpp index c83058e3dc..3fc7288fd1 100644 --- a/include/seqan3/search/fm_index/fm_index_cursor.hpp +++ b/include/seqan3/search/fm_index/fm_index_cursor.hpp @@ -13,10 +13,9 @@ #include #include -#include - #include #include +#include #include #include #include diff --git a/include/seqan3/utility/container/aligned_allocator.hpp b/include/seqan3/utility/container/aligned_allocator.hpp index 8a5ffce4d6..2665be4c24 100644 --- a/include/seqan3/utility/container/aligned_allocator.hpp +++ b/include/seqan3/utility/container/aligned_allocator.hpp @@ -9,6 +9,7 @@ #pragma once +#include #include #include #include diff --git a/include/seqan3/utility/math.hpp b/include/seqan3/utility/math.hpp index 74b91022ec..e0393002dc 100644 --- a/include/seqan3/utility/math.hpp +++ b/include/seqan3/utility/math.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include diff --git a/test/documentation/seqan3_doxygen_cfg.in b/test/documentation/seqan3_doxygen_cfg.in index 57c48fee25..169532e59b 100644 --- a/test/documentation/seqan3_doxygen_cfg.in +++ b/test/documentation/seqan3_doxygen_cfg.in @@ -161,7 +161,7 @@ RECURSIVE = YES EXCLUDE = EXCLUDE_SYMLINKS = NO EXCLUDE_PATTERNS = */doc/*.cpp \ - */include/seqan3/contrib/std/* \ + */include/seqan3/contrib/* \ */include/seqan3/vendor/* \ ${SEQAN3_DOXYGEN_EXCLUDE_PATTERNS} EXCLUDE_SYMBOLS = seqan3::contrib \ diff --git a/test/performance/range/container_push_back_benchmark.cpp b/test/performance/range/container_push_back_benchmark.cpp index 5edf1ab431..870721af72 100644 --- a/test/performance/range/container_push_back_benchmark.cpp +++ b/test/performance/range/container_push_back_benchmark.cpp @@ -8,11 +8,10 @@ #include #include -#include - #include #include #include +#include #include #include diff --git a/test/performance/range/container_seq_read_benchmark.cpp b/test/performance/range/container_seq_read_benchmark.cpp index 2277a6efd9..4c679d5593 100644 --- a/test/performance/range/container_seq_read_benchmark.cpp +++ b/test/performance/range/container_seq_read_benchmark.cpp @@ -8,11 +8,10 @@ #include #include -#include - #include #include #include +#include #include #include #include diff --git a/test/performance/range/container_seq_write_benchmark.cpp b/test/performance/range/container_seq_write_benchmark.cpp index 27f45e1269..fa42c92b93 100644 --- a/test/performance/range/container_seq_write_benchmark.cpp +++ b/test/performance/range/container_seq_write_benchmark.cpp @@ -8,11 +8,10 @@ #include #include -#include - #include #include #include +#include #include #include #include diff --git a/test/scripts/amalgamate-sdsl.sh b/test/scripts/amalgamate-sdsl.sh new file mode 100755 index 0000000000..245dd44ee3 --- /dev/null +++ b/test/scripts/amalgamate-sdsl.sh @@ -0,0 +1,53 @@ +#!/usr/bin/bash + +# SPDX-FileCopyrightText: 2006-2025 Knut Reinert & Freie Universität Berlin +# SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik +# SPDX-License-Identifier: BSD-3-Clause + +set -Eeuo pipefail + +SDSL_REPO_PATH="${PWD}/sdsl-lite/" +CXX=${CXX:-c++} +CPP_AMALGAMATE=cpp-amalgamate + +if [[ ! -d "${SDSL_REPO_PATH}" || ! -f "${SDSL_REPO_PATH}"/include/sdsl/version.hpp ]]; then + mkdir -p "${SDSL_REPO_PATH}" + wget -q -O- https://github.com/xxsds/sdsl-lite/archive/refs/heads/master.tar.gz | tar -xz -C "${SDSL_REPO_PATH}" --strip-components=1 +fi + +if ! command -v "${CPP_AMALGAMATE}" &> /dev/null; then + if [[ ! -f cpp-amalgamate ]]; then + wget -q https://github.com/Felerius/cpp-amalgamate/releases/download/1.0.1/cpp-amalgamate-x86_64-unknown-linux-gnu -O cpp-amalgamate + fi + + chmod +x cpp-amalgamate + CPP_AMALGAMATE=${PWD}/cpp-amalgamate +fi + +cat < includes.cpp +#include +#include +#include +#include +EOL + +"${CPP_AMALGAMATE}" --quiet --dir "${SDSL_REPO_PATH}/include" --cyclic-include warn --output amalgamated.hpp includes.cpp +grep -v "#include decycled.hpp +# This command strips all comments. +CONTENT=$(${CXX} -fpreprocessed -dD -E -P -w decycled.hpp) + +cat < sdsl-lite.hpp +// SPDX-FileCopyrightText: 2016 SDSL Project Authors +// SPDX-License-Identifier: BSD-3-Clause + +// This file was generated by https://github.com/seqan/seqan3/blob/main/test/scripts/amalgamate-sdsl.sh + +#pragma once + +// clang-format off +${CONTENT} +// clang-format on + +EOL + +echo "${PWD}/sdsl-lite.hpp" diff --git a/test/snippet/core/detail/template_inspection_usage.cpp b/test/snippet/core/detail/template_inspection_usage.cpp index 1d489537c2..91cc6293d5 100644 --- a/test/snippet/core/detail/template_inspection_usage.cpp +++ b/test/snippet/core/detail/template_inspection_usage.cpp @@ -2,6 +2,8 @@ // SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik // SPDX-License-Identifier: CC0-1.0 +#include + #include #include diff --git a/test/snippet/utility/tuple_utility.cpp b/test/snippet/utility/tuple_utility.cpp index 47379491e6..450cf77f00 100644 --- a/test/snippet/utility/tuple_utility.cpp +++ b/test/snippet/utility/tuple_utility.cpp @@ -2,6 +2,8 @@ // SPDX-FileCopyrightText: 2016-2025 Knut Reinert & MPI für molekulare Genetik // SPDX-License-Identifier: CC0-1.0 +#include + #include int main() diff --git a/test/unit/alphabet/container/container_concept_test.cpp b/test/unit/alphabet/container/container_concept_test.cpp index dad5171044..f1f3ce508d 100644 --- a/test/unit/alphabet/container/container_concept_test.cpp +++ b/test/unit/alphabet/container/container_concept_test.cpp @@ -11,13 +11,12 @@ #include #include -#include - #include #include #include #include #include +#include #include TEST(range_concept, forward_range)