Skip to content

Commit

Permalink
General cleanup (#64)
Browse files Browse the repository at this point in the history
Create pisa library
Declare dependency for gumbo
Fix #60
Fix #22
  • Loading branch information
amallia authored Jan 16, 2019
1 parent e3a3121 commit 3672c4c
Show file tree
Hide file tree
Showing 114 changed files with 525 additions and 657 deletions.
33 changes: 26 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)

configure_file(
${DS2I_SOURCE_DIR}/include/ds2i_config.hpp.in
${DS2I_SOURCE_DIR}/include/ds2i_config.hpp
${DS2I_SOURCE_DIR}/include/pisa/ds2i_config.hpp.in
${DS2I_SOURCE_DIR}/include/pisa/ds2i_config.hpp
ESCAPE_QUOTES)

if(NOT CMAKE_BUILD_TYPE)
Expand All @@ -31,6 +31,7 @@ set_target_properties(gumbo::gumbo PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
${CMAKE_CURRENT_SOURCE_DIR}/external/gumbo-parser/src)
set_property(TARGET gumbo::gumbo APPEND PROPERTY IMPORTED_LOCATION
${CMAKE_BINARY_DIR}/gumbo-parser/lib/libgumbo.a)
add_dependencies( gumbo::gumbo gumbo-external )

# Add code coverage
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/external/CMake-codecov/cmake")
Expand Down Expand Up @@ -63,11 +64,29 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
link_libraries(Threads::Threads)

# add the root directory to include path to make includes absolute
include_directories(${DS2I_SOURCE_DIR}/external
${STXXL_INCLUDE_DIRS}
${DS2I_SOURCE_DIR}/include
)

include_directories(include)
add_library(pisa INTERFACE)
target_include_directories(pisa INTERFACE
$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include/pisa>
)
target_link_libraries(pisa INTERFACE
Threads::Threads
Boost::boost
QMX
mio
ParallelSTL
GSL
FastPFor
streamvbyte
MaskedVByte
simdcomp
gumbo::gumbo
Boost::filesystem
Porter2
warcpp
)
target_include_directories(pisa INTERFACE external)

add_subdirectory(src)

Expand Down
40 changes: 8 additions & 32 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,43 +1,19 @@
add_executable(index_perftest index_perftest.cpp)
target_link_libraries(index_perftest
Boost::boost
mio
FastPFor
streamvbyte
MaskedVByte
QMX
simdcomp
ParallelSTL
GSL
)
pisa
)

add_executable(perftest_interpolative perftest_interpolative.cpp)
target_link_libraries(perftest_interpolative
Boost::boost
mio
FastPFor
QMX
simdcomp
GSL
)
pisa
)

add_executable(selective_queries selective_queries.cpp)
target_link_libraries(selective_queries
Boost::boost
mio
FastPFor
streamvbyte
MaskedVByte
QMX
simdcomp
ParallelSTL
GSL
)
pisa
)

add_executable(scan_perftest scan_perftest.cpp)
target_link_libraries(scan_perftest
Boost::boost
mio
ParallelSTL
GSL
)
pisa
)
12 changes: 6 additions & 6 deletions benchmarks/index_perftest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
#include "index_types.hpp"
#include "util/util.hpp"

using ds2i::logger;
using ds2i::get_time_usecs;
using ds2i::do_not_optimize_away;
using pisa::logger;
using pisa::get_time_usecs;
using pisa::do_not_optimize_away;

template <typename IndexType, bool with_freqs>
void perftest(IndexType const& index, std::string const& type)
Expand Down Expand Up @@ -65,7 +65,7 @@ void perftest(IndexType const& index, std::string const& type)
if (size < min_length) continue;

skip_values.emplace_back(i, std::vector<uint64_t>());
for (size_t i = 0; i < std::min(ds2i::ceil_div(size, skip),
for (size_t i = 0; i < std::min(pisa::ceil_div(size, skip),
max_calls_per_list); ++i) {
reader.move(i * skip);
skip_values.back().second.push_back(reader.docid());
Expand Down Expand Up @@ -106,7 +106,7 @@ void perftest(const char* index_filename, std::string const& type)
logger() << "Loading index from " << index_filename << std::endl;
IndexType index;
mio::mmap_source m(index_filename);
ds2i::mapper::map(index, m, ds2i::mapper::map_flags::warmup);
pisa::mapper::map(index, m, pisa::mapper::map_flags::warmup);

perftest<IndexType, false>(index, type);
perftest<IndexType, true>(index, type);
Expand All @@ -115,7 +115,7 @@ void perftest(const char* index_filename, std::string const& type)

int main(int argc, const char** argv) {

using namespace ds2i;
using namespace pisa;

if (argc != 3) {
std::cerr << "Usage: " << argv[0]
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/perftest_interpolative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

int main()
{
using namespace ds2i;
using namespace pisa;
static const size_t size = interpolative_block::block_size;
static const size_t runs = 1 << 20;

Expand Down
18 changes: 9 additions & 9 deletions benchmarks/scan_perftest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@
#include "sequence/uniform_partitioned_sequence.hpp"
#include "util/util.hpp"

using ds2i::logger;
using ds2i::get_time_usecs;
using ds2i::do_not_optimize_away;
using pisa::logger;
using pisa::get_time_usecs;
using pisa::do_not_optimize_away;

template <typename BaseSequence>
void perftest(const char* index_filename)
{
typedef ds2i::sequence_collection<BaseSequence> collection_type;
typedef pisa::sequence_collection<BaseSequence> collection_type;
logger() << "Loading collection from " << index_filename << std::endl;
collection_type coll;
mio::mmap_source m(index_filename);
ds2i::mapper::map(coll, m, ds2i::mapper::map_flags::warmup);
pisa::mapper::map(coll, m, pisa::mapper::map_flags::warmup);

if (true) {
logger() << "Scanning all the posting lists" << std::endl;
Expand Down Expand Up @@ -127,10 +127,10 @@ void perftest(const char* index_filename)
}
int main(int argc, const char** argv) {

using ds2i::compact_elias_fano;
using ds2i::indexed_sequence;
using ds2i::partitioned_sequence;
using ds2i::uniform_partitioned_sequence;
using pisa::compact_elias_fano;
using pisa::indexed_sequence;
using pisa::partitioned_sequence;
using pisa::uniform_partitioned_sequence;

if (argc != 3) {
std::cerr << "Usage: " << argv[0]
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/selective_queries.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ template <typename IndexType>
void selective_queries(const char* index_filename,
std::string const& type)
{
using namespace ds2i;
using namespace pisa;


IndexType index;
Expand Down Expand Up @@ -59,7 +59,7 @@ void selective_queries(const char* index_filename,


int main(int, const char** argv) {
using namespace ds2i;
using namespace pisa;

std::string type = argv[1];
const char* index_filename = argv[2];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include <sys/mman.h>
#endif

namespace ds2i {
namespace pisa {

template <typename Source = mio::mmap_source>
class base_binary_collection {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#include "binary_collection.hpp"

namespace ds2i {
namespace pisa {

class binary_freq_collection {
public:
Expand Down
4 changes: 2 additions & 2 deletions include/bit_vector.hpp → include/pisa/bit_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

#include "succinct/mappable_vector.hpp"

namespace ds2i {
namespace pisa {

namespace detail {
inline size_t words_for(uint64_t n) { return ceil_div(n, 64); }
Expand Down Expand Up @@ -476,4 +476,4 @@ class bit_vector {
mapper::mappable_vector<uint64_t> m_bits;
};

} // namespace ds2i
} // namespace pisa
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include "codec/compact_elias_fano.hpp"

namespace ds2i {
namespace pisa {

class bitvector_collection {
public:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "codec/compact_elias_fano.hpp"
#include "block_posting_list.hpp"

namespace ds2i {
namespace pisa {

template <typename BlockCodec, bool Profile=false>
class block_freq_index {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include "util/util.hpp"
#include "util/block_profiler.hpp"

namespace ds2i {
namespace pisa {

template <typename BlockCodec, bool Profile=false>
struct block_posting_list {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#define PREDICT_FALSE(x) x
#endif

namespace ds2i {
namespace pisa {

class NotEnoughStorage : public std::runtime_error {
public:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "global_parameters.hpp"
#include "util/util.hpp"

namespace ds2i {
namespace pisa {

struct all_ones_sequence {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "interpolative_coding.hpp"
#include "util/util.hpp"

namespace ds2i {
namespace pisa {

// workaround: VariableByte::decodeArray needs the buffer size, while we
// only know the number of values. It also pads to 32 bits. We need to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "global_parameters.hpp"
#include "util/util.hpp"

namespace ds2i {
namespace pisa {

struct compact_elias_fano {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "global_parameters.hpp"
#include "util/util.hpp"

namespace ds2i {
namespace pisa {

struct compact_ranked_bitvector {

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once

namespace ds2i {
namespace pisa {

// note: n can be 0
void write_gamma(bit_vector_builder& bvb, uint64_t n)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

#include "util/broadword.hpp"

namespace ds2i {
namespace pisa {

class bit_writer {
public:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "MaskedVByte/include/varintencode.h"
#include "MaskedVByte/include/varintdecode.h"

namespace ds2i {
namespace pisa {
struct maskedvbyte_block {
static const uint64_t block_size = 128;
static void encode(uint32_t const *in,
Expand Down Expand Up @@ -33,4 +33,4 @@ struct maskedvbyte_block {
return in + read;
}
};
} // namespace ds2i
} // namespace pisa
4 changes: 2 additions & 2 deletions include/codec/qmx.hpp → include/pisa/codec/qmx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#include "QMX/qmx.hpp"

namespace ds2i {
namespace pisa {
struct qmx_block {
static const uint64_t block_size = 128;
static const uint64_t overflow = 512;
Expand Down Expand Up @@ -46,4 +46,4 @@ struct qmx_block {
return in + enc_len;
}
};
} // namespace ds2i
} // namespace pisa
4 changes: 2 additions & 2 deletions include/codec/simdbp.hpp → include/pisa/codec/simdbp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ extern "C" {
#include "simdcomp/include/simdbitpacking.h"
}

namespace ds2i {
namespace pisa {
struct simdbp_block {
static const uint64_t block_size = 128;
static void encode(uint32_t const *in,
Expand Down Expand Up @@ -37,4 +37,4 @@ struct simdbp_block {
return in + b * sizeof(__m128i);
}
};
} // namespace ds2i
} // namespace pisa
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#pragma once
#include "FastPFor/headers/simple16.h"

namespace ds2i {
namespace pisa {

struct simple16_block {
static const uint64_t block_size = 128;
Expand Down Expand Up @@ -37,4 +37,4 @@ struct simple16_block {
return ret;
}
};
} // namespace ds2i
} // namespace pisa
Loading

0 comments on commit 3672c4c

Please sign in to comment.