Skip to content

Commit

Permalink
Add Column Imprints (#45)
Browse files Browse the repository at this point in the history
We integrated Column Imprints into Bliss.

Column Imprints requires 2 tuning knobs (block_size and #bins) and supports different data types. You have to manually pass a string to tell the exact type of the data.
  • Loading branch information
altramarine authored Feb 21, 2025
1 parent 4281f2c commit 0336039
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ add_library(bliss OBJECT
${CMAKE_SOURCE_DIR}/src/bliss/bench_lipp.h
${CMAKE_SOURCE_DIR}/src/bliss/bench_alex.h
${CMAKE_SOURCE_DIR}/src/bliss/bench_btree.h
${CMAKE_SOURCE_DIR}/src/bliss/bench_imprints.h
${CMAKE_SOURCE_DIR}/src/bliss/bench_columnsketches.h
${CMAKE_SOURCE_DIR}/src/bliss/bench_skiplist.h
${CMAKE_SOURCE_DIR}/src/bliss/bench_pgm.h
Expand All @@ -75,6 +76,7 @@ target_link_libraries(bliss PUBLIC
alex
lipp
tlx
imprints
skiplist
pgm
art
Expand Down
12 changes: 12 additions & 0 deletions external/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,18 @@ endif()
add_library(tlx INTERFACE)
target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/)

FetchContent_Declare(
imprints
GIT_REPOSITORY https://github.com/altramarine/imprints.git
GIT_TAG cmake-version
)
FetchContent_GetProperties(imprints)
if (NOT imprints_POPULATED)
FetchContent_Populate(imprints)
endif()
add_library(imprints INTERFACE)
target_include_directories(imprints INTERFACE ${imprints_SOURCE_DIR}/include)

FetchContent_Declare(
columnsketches
GIT_REPOSITORY https://github.com/altramarine/ColumnSketches-cpp.git
Expand Down
65 changes: 65 additions & 0 deletions src/bliss/bench_imprints.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#ifndef BLISS_BENCH_IMPRINTS
#define BLISS_BENCH_IMPRINTS

#include <vector>

#include "bliss/bliss_index.h"
#include "column_imprints.h"
#include <string>

namespace bliss {

template <typename KEY_TYPE, typename VALUE_TYPE>
class BlissImprintsIndex : public BlissIndex<KEY_TYPE, VALUE_TYPE> {
public:
BlissImprintsIndex(int blocksize = 64, int maxbins = 64, std::string type_name = std::string("unsigned long")) : blocksize_(blocksize), maxbins_(maxbins) {
imprints_ = new Imprints<VALUE_TYPE>(blocksize, maxbins, type_name);
};

~BlissImprintsIndex() {
delete imprints_;
}

void bulkload(
std::vector<std::pair<KEY_TYPE, VALUE_TYPE>> values) override {
std::vector<VALUE_TYPE> vals;
for(auto x: values) {
vals.push_back(x.second);
}
imprints_->bulkload(vals);
}


bool get(VALUE_TYPE start, VALUE_TYPE end) {
unsigned int * res = imprints_->range_scan(start, end);
if(res != nullptr) {
delete res;
return true;
} else
return false;
}

uint32_t * get_bitmask(VALUE_TYPE start, VALUE_TYPE end) {
return imprints_->range_scan(start, end);
}

bool get(KEY_TYPE key) override {
std::runtime_error("Column Imprints does not support get(key, value).");
return false;
}

void put(KEY_TYPE key, VALUE_TYPE value) {
std::runtime_error("Column Imprints does not support put(key, value).");
return;
}

void end_routine() override {}
private:
Imprints<VALUE_TYPE> *imprints_;
int blocksize_, maxbins_;
std::string type_name;
};

} // namespace bliss

#endif // !BLISS_BENCH_BTREE
1 change: 0 additions & 1 deletion src/bliss/util/execute.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,4 @@ void execute_non_empty_reads(bliss::BlissIndex<key_type, value_type> &tree,
} // namespace executor
} // namespace utils
} // namespace bliss

#endif
3 changes: 3 additions & 0 deletions src/bliss_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "bliss/util/execute.h"
#include "bliss/util/reader.h"
#include "bliss/util/timer.h"
#include "bliss/bench_imprints.h"
#include "include/pgm/pgm_index_dynamic.hpp"
#include "skip_list.h"

Expand Down Expand Up @@ -175,6 +176,8 @@ int main(int argc, char *argv[]) {
index.reset(new bliss::BlissLippIndex<key_type, value_type>());
} else if (config.index == "btree") {
index.reset(new bliss::BlissBTreeIndex<key_type, value_type>());
} else if (config.index == "imprints") {
index.reset(new bliss::BlissImprintsIndex<key_type, value_type>(/* block_size */64, /* max_bins */64));
} else if (config.index == "columnskteches") {
#ifdef COMPILE_COLUMNSKETCHES
index.reset(new bliss::BlissColumnSketchesIndex<key_type, value_type>());
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ target_include_directories(bliss_test_infra PUBLIC
add_subdirectory(test_alex)
add_subdirectory(test_lipp)
add_subdirectory(test_btree)
add_subdirectory(test_imprints)
add_subdirectory(test_skiplist)
add_subdirectory(test_art)
add_subdirectory(test_leveldb)
Expand Down
2 changes: 2 additions & 0 deletions tests/bliss_index_tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include "bliss/bench_btree.h"
#include "bliss/bench_leveldb.h"
#include "bliss/bench_lipp.h"
#include "bliss/bench_imprints.h"
#include "bliss/bench_skiplist.h"
#include "bliss/bench_pgm.h"
#include "bliss/bench_skiplist.h"
#include "bliss/bliss_index.h"
Expand Down
9 changes: 9 additions & 0 deletions tests/test_imprints/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
get_filename_component(EXEC ${CMAKE_CURRENT_SOURCE_DIR} NAME)
file(GLOB_RECURSE CPP_TESTS "*_tests.cpp")
add_executable(${EXEC} ${CPP_TESTS})
target_link_libraries(${EXEC} PRIVATE
bliss
bliss_test_infra
GTest::gtest_main)
include(GoogleTest)
gtest_discover_tests(${EXEC})
28 changes: 28 additions & 0 deletions tests/test_imprints/imprint_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#include "bliss_index_tests.h"

class ImprintsTest : public BlissIndexTest {};

TEST_F(ImprintsTest, TestImprint_Random) {
index.reset(new bliss::BlissImprintsIndex<size_t, key_type>(64, 64, std::string("unsigned long")));
std::vector<key_type> data;
GenerateData(data, num_keys, false);
std::vector < std::pair<size_t, key_type> > bulkload_data;
for (size_t i = 0; i < data.size(); i ++) {
bulkload_data.push_back(std::make_pair(i, data[i]));
}
index->bulkload(bulkload_data);
auto minimum = 0;
for (size_t key = 0; key < num_keys; key += std::max(1,num_keys / 20)) {
uint32_t *result = nullptr;
bliss::BlissImprintsIndex<size_t, key_type> * index_copy = (bliss::BlissImprintsIndex<size_t, key_type> *)index.get();
result = (index_copy)->get_bitmask(minimum, data[key]);
for(size_t i = 0; i < num_keys; i ++) {
if((result[i / 32] >> (i & 31)) & 1u) {
EXPECT_TRUE(data[i] <= data[key] && data[i] > minimum);
} else {
EXPECT_FALSE(data[i] <= data[key] && data[i] > minimum);
}
}
free(result);
}
}

0 comments on commit 0336039

Please sign in to comment.