From e1d2135f580ea78bc0163a108c3d5b59ab2bc0fb Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Thu, 17 Oct 2024 18:31:19 +0800 Subject: [PATCH 01/24] Column Imprints integration --- 1.txt | 0 CMakeLists.txt | 2 + external/CMakeLists.txt | 16 ++- src/bliss/bench_imprints.h | 143 ++++++++++++++++++++++++++ src/bliss/bliss_index.h | 3 + src/bliss_bench.cpp | 3 + tests/CMakeLists.txt | 3 +- tests/bliss_index_tests.h | 1 + tests/test_imprints/CMakeLists.txt | 9 ++ tests/test_imprints/imprint_tests.cpp | 34 ++++++ 10 files changed, 212 insertions(+), 2 deletions(-) create mode 100644 1.txt create mode 100644 src/bliss/bench_imprints.h create mode 100644 tests/test_imprints/CMakeLists.txt create mode 100644 tests/test_imprints/imprint_tests.cpp diff --git a/1.txt b/1.txt new file mode 100644 index 0000000..e69de29 diff --git a/CMakeLists.txt b/CMakeLists.txt index 66226eb..8698192 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,7 @@ add_library(bliss OBJECT ${CMAKE_SOURCE_DIR}/src/bliss/bench_lipp.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_alex.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_btree.h + ${CMAKE_SOURCE_DIR}/src/bliss/bench_imprints.h ) target_compile_features(bliss PUBLIC @@ -63,6 +64,7 @@ target_link_libraries(bliss PUBLIC alex lipp tlx + imprints ) target_include_directories(bliss PUBLIC diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 3531b2f..6db1e75 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -70,4 +70,18 @@ if (NOT tlx_POPULATED) endif() add_library(tlx INTERFACE) -target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/) \ No newline at end of file +target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/) + + +FetchContent_Declare( + imprints + GIT_REPOSITORY git@github.com:altramarine/imprints.git + GIT_TAG cmake-version +) +FetchContent_GetProperties(imprints) +if (NOT imprints_POPULATED) + FetchContent_Populate(imprints) +endif() + +add_library(imprints INTERFACE) +target_include_directories(imprints INTERFACE ${imprints_SOURCE_DIR}/include) \ No newline at end of file diff --git a/src/bliss/bench_imprints.h b/src/bliss/bench_imprints.h new file mode 100644 index 0000000..cc7604b --- /dev/null +++ b/src/bliss/bench_imprints.h @@ -0,0 +1,143 @@ +#ifndef BLISS_BENCH_IMPRINTS +#define BLISS_BENCH_IMPRINTS + +#include + +#include "bliss/bliss_index.h" +#include +#include + +namespace bliss { + +template +class BlissImprintsIndex : public BlissIndex { + public: + BlissImprintsIndex(int blocksize = 64, int maxbins = 64) : blocksize_(blocksize), maxbins_(maxbins) { + column = (Column *) malloc(sizeof(Column)); + strcpy(column->type_name, boost::typeindex::type_id().pretty_name().c_str()); + // std::cout << boost::typeindex::type_id().pretty_name() << std::endl; + // printf("typename is: %s\n", column->type_name); + if (strcmp(column->type_name, "tinyint") == 0 || strcmp(column->type_name, "boolean") == 0) { + column->coltype = TYPE_bte; + column->min.bval = 127; + column->max.bval = -127; + } else if (strcmp(column->type_name, "char") == 0 || strcmp(column->type_name,"smallint")== 0 || strcmp(column->type_name, "short")== 0) { + column->coltype = TYPE_sht; + column->min.sval = 32767; + column->max.sval = -32767; + } else if (strcmp(column->type_name, "decimal") == 0 || strcmp(column->type_name, "int") == 0 || strcmp(column->type_name, "date") == 0) { + column->coltype = TYPE_int; + column->min.ival = INT_MAX; + column->max.ival = INT_MIN; + } else if (strcmp(column->type_name, "long") == 0 || strcmp(column->type_name, "long int") == 0) { + column->coltype = TYPE_lng; + column->min.lval = LONG_MAX; + column->max.lval = LONG_MIN; + } else if (strcmp(column->type_name, "float") == 0 || strcmp(column->type_name, "real") == 0) { + column->coltype= TYPE_flt; + column->min.fval = FLT_MAX; + column->max.fval = FLT_MIN; + } else if (strcmp(column->type_name, "double") == 0 ) { + column->coltype = TYPE_dbl; + column->min.dval = DBL_MAX; + column->max.dval = -DBL_MAX; + } else if (strcmp(column->type_name, "oid") == 0 || strcmp(column->type_name, "unsigned long") == 0) { + column->coltype = TYPE_oid; + column->min.ulval = ULONG_MAX; + column->max.ulval = 0; + } else { + printf("error: type [%s] not supported\n", column->type_name); + std::runtime_error("[column imprints]: type not supported"); + } + std::cout << "column initiated" << std::endl; + }; + + + void bulkload( + std::vector> values) override { + // expects the pairs to be pre-sorted before performing bulk load + // this->_index.bulk_load(values.begin(), values.end()); + // binning() + column->col = (char *)new VALUE_TYPE[values.size()]; + for(size_t i = 0; i < values.size(); i++) { + ((VALUE_TYPE *)column->col)[i] = values[i].second; + // std::cout << i << " " << values[i].second << std::endl; + } + const int stride[14]= { 0,0,0,1,2,0,4,8,0,0,4,8,8,0}; + int vpp = PAGESIZE/stride[column->coltype]; + if (vpp == 0) { + printf("rows per pages is 0\n"); + std::runtime_error("rows per pages is 0"); + // return -1; + } + int pages = column->colcount/vpp + 1; + if (pages > MAX_IMPS) { + printf("there are too many pages %ld\n", pages); + std::runtime_error("column imprints: too many pages"); + // return -1; + } + column->typesize = stride[column->coltype]; + column->colcount = values.size(); + index = create_imprints(column, blocksize_, maxbins_, 1); + } + + unsigned int * get(VALUE_TYPE low, VALUE_TYPE high) { + ValRecord low_, high_; + switch (column->coltype) { + case TYPE_bte: + low_.bval = low; + high_.bval = high; + break; + case TYPE_sht: + low_.sval = low; + high_.sval = high; + break; + case TYPE_int: + low_.ival = low; + high_.ival = high; + // setqueryrange(ival); + break; + case TYPE_lng: + low_.lval = low; + high_.lval = high; + break; + case TYPE_oid: + low_.ulval = low; + high_.ulval = high; + // setqueryrange(ulval); + break; + case TYPE_flt: + low_.fval = low; + high_.fval = high; + // setqueryrange(fval); + break; + case TYPE_dbl: + low_.dval = low; + high_.dval = high; + // setqueryrange(dval); + } + uint32_t *result_data = new uint32_t[(column->colcount + 31) / 32]; + memset(result_data, 0, sizeof(uint32_t) * ((column->colcount + 31) / 32)); + auto dummy = usec(); + imprints_simd_scan(column, index, low_, high_, &dummy, result_data); + dummy = usec(); + return result_data; + } + + bool get(KEY_TYPE key) override { return false; } + + void put(KEY_TYPE key, VALUE_TYPE value) { + std::runtime_error("Column Imprints does not support put(key, value)."); + return; + } + + void end_routine() override {} + private: + Column *column; + Imprints_index *index; + int blocksize_, maxbins_; +}; + +} // namespace bliss + +#endif // !BLISS_BENCH_BTREE diff --git a/src/bliss/bliss_index.h b/src/bliss/bliss_index.h index e1dfe32..7c5c04b 100644 --- a/src/bliss/bliss_index.h +++ b/src/bliss/bliss_index.h @@ -11,6 +11,9 @@ class BlissIndex { public: virtual void bulkload(std::vector> values); virtual bool get(KEY_TYPE key) = 0; + virtual uint32_t * get(VALUE_TYPE start, VALUE_TYPE end) { + std::runtime_error("get(VALUE_TYPE start, VALUE_TYPE end) is not yet implemented"); + }; virtual void put(KEY_TYPE key, VALUE_TYPE value) = 0; virtual void end_routine() = 0; }; diff --git a/src/bliss_bench.cpp b/src/bliss_bench.cpp index 97b8e32..5c4a8d1 100644 --- a/src/bliss_bench.cpp +++ b/src/bliss_bench.cpp @@ -15,6 +15,7 @@ #include "bliss/util/execute.h" #include "bliss/util/reader.h" #include "bliss/util/timer.h" +#include "bliss/bench_imprints.h" using namespace bliss::utils; @@ -168,6 +169,8 @@ int main(int argc, char *argv[]) { index.reset(new bliss::BlissLippIndex()); } else if (config.index == "btree") { index.reset(new bliss::BlissBTreeIndex()); + } else if (config.index == "imprints") { + index.reset(new bliss::BlissImprintsIndex(64, 64)); } else { spdlog::error(config.index + " not implemented yet", 1); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e916e4f..856a46b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -16,4 +16,5 @@ target_include_directories(bliss_test_infra PUBLIC add_subdirectory(test_alex) add_subdirectory(test_lipp) -add_subdirectory(test_btree) \ No newline at end of file +add_subdirectory(test_btree) +add_subdirectory(test_imprints) \ No newline at end of file diff --git a/tests/bliss_index_tests.h b/tests/bliss_index_tests.h index edb14fa..668338f 100644 --- a/tests/bliss_index_tests.h +++ b/tests/bliss_index_tests.h @@ -12,6 +12,7 @@ #include "bliss/bench_alex.h" #include "bliss/bench_btree.h" #include "bliss/bench_lipp.h" +#include "bliss/bench_imprints.h" #include "bliss/bliss_index.h" #include "bliss/util/args.h" #include "bliss/util/config.h" diff --git a/tests/test_imprints/CMakeLists.txt b/tests/test_imprints/CMakeLists.txt new file mode 100644 index 0000000..b44a9ac --- /dev/null +++ b/tests/test_imprints/CMakeLists.txt @@ -0,0 +1,9 @@ +get_filename_component(EXEC ${CMAKE_CURRENT_SOURCE_DIR} NAME) +file(GLOB_RECURSE CPP_TESTS "*_tests.cpp") +add_executable(${EXEC} ${CPP_TESTS}) +target_link_libraries(${EXEC} PRIVATE +bliss +bliss_test_infra +GTest::gtest_main) +include(GoogleTest) +gtest_discover_tests(${EXEC}) \ No newline at end of file diff --git a/tests/test_imprints/imprint_tests.cpp b/tests/test_imprints/imprint_tests.cpp new file mode 100644 index 0000000..8662019 --- /dev/null +++ b/tests/test_imprints/imprint_tests.cpp @@ -0,0 +1,34 @@ +#include "bliss_index_tests.h" + +class ImprintsTest : public BlissIndexTest {}; + +TEST_F(ImprintsTest, TestImprint_Random) { + index.reset(new bliss::BlissImprintsIndex()); + std::vector data; + GenerateData(data, num_keys, false); + std::vector < std::pair > bulkload_data; + for (size_t i = 0; i < data.size(); i ++) { + bulkload_data.push_back(std::make_pair(i, data[i])); + } + index->bulkload(bulkload_data); + auto minimum = 0; + // auto insert_start = data.begin(); + // auto insert_end = data.end(); + // executor::execute_inserts(*index, insert_start, insert_end); + + for (size_t key = 0; key < num_keys; key += std::max(1,num_keys / 20)) { + uint32_t *result = nullptr; + result = index->get(minimum, data[key]); + for(size_t i = 0; i < num_keys; i ++) { + // if((data[i] <= data[key] && data[i] > minimum) != ((result[i / 32] >> (i & 31)) & 1u)) { + // std::cout << minimum << " < " << data[i] << " <= " << data[key] << " " << ((result[i / 32] >> (i & 31)) & 1u) << " :: idx = " << i << std::endl; + // } + if((result[i / 32] >> (i & 31)) & 1u) { + EXPECT_TRUE(data[i] <= data[key] && data[i] > minimum); + } else { + EXPECT_FALSE(data[i] <= data[key] && data[i] > minimum); + } + } + free(result); + } +} \ No newline at end of file From 29bd6ee3cf5e1f792ab6baf69509674384ab74ca Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Fri, 25 Oct 2024 18:54:33 +0800 Subject: [PATCH 02/24] fix gitfetch, pack column imprints --- external/CMakeLists.txt | 2 +- src/bliss/bench_imprints.h | 117 +++++-------------------------------- 2 files changed, 17 insertions(+), 102 deletions(-) diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 6db1e75..8cb8ff5 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -75,7 +75,7 @@ target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/) FetchContent_Declare( imprints - GIT_REPOSITORY git@github.com:altramarine/imprints.git + GIT_REPOSITORY https://github.com:altramarine/imprints.git GIT_TAG cmake-version ) FetchContent_GetProperties(imprints) diff --git a/src/bliss/bench_imprints.h b/src/bliss/bench_imprints.h index cc7604b..dcd0587 100644 --- a/src/bliss/bench_imprints.h +++ b/src/bliss/bench_imprints.h @@ -13,118 +13,35 @@ template class BlissImprintsIndex : public BlissIndex { public: BlissImprintsIndex(int blocksize = 64, int maxbins = 64) : blocksize_(blocksize), maxbins_(maxbins) { - column = (Column *) malloc(sizeof(Column)); - strcpy(column->type_name, boost::typeindex::type_id().pretty_name().c_str()); - // std::cout << boost::typeindex::type_id().pretty_name() << std::endl; - // printf("typename is: %s\n", column->type_name); - if (strcmp(column->type_name, "tinyint") == 0 || strcmp(column->type_name, "boolean") == 0) { - column->coltype = TYPE_bte; - column->min.bval = 127; - column->max.bval = -127; - } else if (strcmp(column->type_name, "char") == 0 || strcmp(column->type_name,"smallint")== 0 || strcmp(column->type_name, "short")== 0) { - column->coltype = TYPE_sht; - column->min.sval = 32767; - column->max.sval = -32767; - } else if (strcmp(column->type_name, "decimal") == 0 || strcmp(column->type_name, "int") == 0 || strcmp(column->type_name, "date") == 0) { - column->coltype = TYPE_int; - column->min.ival = INT_MAX; - column->max.ival = INT_MIN; - } else if (strcmp(column->type_name, "long") == 0 || strcmp(column->type_name, "long int") == 0) { - column->coltype = TYPE_lng; - column->min.lval = LONG_MAX; - column->max.lval = LONG_MIN; - } else if (strcmp(column->type_name, "float") == 0 || strcmp(column->type_name, "real") == 0) { - column->coltype= TYPE_flt; - column->min.fval = FLT_MAX; - column->max.fval = FLT_MIN; - } else if (strcmp(column->type_name, "double") == 0 ) { - column->coltype = TYPE_dbl; - column->min.dval = DBL_MAX; - column->max.dval = -DBL_MAX; - } else if (strcmp(column->type_name, "oid") == 0 || strcmp(column->type_name, "unsigned long") == 0) { - column->coltype = TYPE_oid; - column->min.ulval = ULONG_MAX; - column->max.ulval = 0; - } else { - printf("error: type [%s] not supported\n", column->type_name); - std::runtime_error("[column imprints]: type not supported"); - } - std::cout << "column initiated" << std::endl; + imprints_ = new Imprints(blocksize, maxbins); + // std::cout << "column initiated" << std::endl; }; + ~BlissImprintsIndex() { + delete imprints_; + } void bulkload( std::vector> values) override { // expects the pairs to be pre-sorted before performing bulk load // this->_index.bulk_load(values.begin(), values.end()); // binning() - column->col = (char *)new VALUE_TYPE[values.size()]; - for(size_t i = 0; i < values.size(); i++) { - ((VALUE_TYPE *)column->col)[i] = values[i].second; - // std::cout << i << " " << values[i].second << std::endl; - } - const int stride[14]= { 0,0,0,1,2,0,4,8,0,0,4,8,8,0}; - int vpp = PAGESIZE/stride[column->coltype]; - if (vpp == 0) { - printf("rows per pages is 0\n"); - std::runtime_error("rows per pages is 0"); - // return -1; - } - int pages = column->colcount/vpp + 1; - if (pages > MAX_IMPS) { - printf("there are too many pages %ld\n", pages); - std::runtime_error("column imprints: too many pages"); - // return -1; + std::vector vals; + for(auto x: values) { + vals.push_back(x.second); } - column->typesize = stride[column->coltype]; - column->colcount = values.size(); - index = create_imprints(column, blocksize_, maxbins_, 1); + imprints_->bulkload(vals); } + unsigned int * get(VALUE_TYPE low, VALUE_TYPE high) { - ValRecord low_, high_; - switch (column->coltype) { - case TYPE_bte: - low_.bval = low; - high_.bval = high; - break; - case TYPE_sht: - low_.sval = low; - high_.sval = high; - break; - case TYPE_int: - low_.ival = low; - high_.ival = high; - // setqueryrange(ival); - break; - case TYPE_lng: - low_.lval = low; - high_.lval = high; - break; - case TYPE_oid: - low_.ulval = low; - high_.ulval = high; - // setqueryrange(ulval); - break; - case TYPE_flt: - low_.fval = low; - high_.fval = high; - // setqueryrange(fval); - break; - case TYPE_dbl: - low_.dval = low; - high_.dval = high; - // setqueryrange(dval); - } - uint32_t *result_data = new uint32_t[(column->colcount + 31) / 32]; - memset(result_data, 0, sizeof(uint32_t) * ((column->colcount + 31) / 32)); - auto dummy = usec(); - imprints_simd_scan(column, index, low_, high_, &dummy, result_data); - dummy = usec(); - return result_data; + return imprints_->range_scan(low, high); } - bool get(KEY_TYPE key) override { return false; } + bool get(KEY_TYPE key) override { + std::runtime_error("Column Imprints does not support get(key, value)."); + return false; + } void put(KEY_TYPE key, VALUE_TYPE value) { std::runtime_error("Column Imprints does not support put(key, value)."); @@ -133,9 +50,7 @@ class BlissImprintsIndex : public BlissIndex { void end_routine() override {} private: - Column *column; - Imprints_index *index; - int blocksize_, maxbins_; + Imprints *imprints_; }; } // namespace bliss From 0213125430a3829044f230df4663d6125e643a21 Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Fri, 25 Oct 2024 18:57:15 +0800 Subject: [PATCH 03/24] fix git fetch url --- external/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 8cb8ff5..38b5a13 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -75,7 +75,7 @@ target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/) FetchContent_Declare( imprints - GIT_REPOSITORY https://github.com:altramarine/imprints.git + GIT_REPOSITORY https://github.com/altramarine/imprints.git GIT_TAG cmake-version ) FetchContent_GetProperties(imprints) From 4eacf30f171f52b65d69efb50a5468d7a097929e Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Fri, 25 Oct 2024 19:08:13 +0800 Subject: [PATCH 04/24] fixing Cmake --- external/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 38b5a13..a74d8e4 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -72,7 +72,7 @@ endif() add_library(tlx INTERFACE) target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/) - +find_package(Boost REQUIRED) FetchContent_Declare( imprints GIT_REPOSITORY https://github.com/altramarine/imprints.git From 88c56167ea33575990779ec5a7a564ce23334aae Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Mon, 4 Nov 2024 15:33:57 +0800 Subject: [PATCH 05/24] remove Boost library, replaced by an type_name input in constructing funtion --- external/CMakeLists.txt | 2 +- src/bliss/bench_imprints.h | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index a74d8e4..242a112 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -72,7 +72,7 @@ endif() add_library(tlx INTERFACE) target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/) -find_package(Boost REQUIRED) +# find_package(Boost REQUIRED) FetchContent_Declare( imprints GIT_REPOSITORY https://github.com/altramarine/imprints.git diff --git a/src/bliss/bench_imprints.h b/src/bliss/bench_imprints.h index dcd0587..ae1fdc3 100644 --- a/src/bliss/bench_imprints.h +++ b/src/bliss/bench_imprints.h @@ -6,14 +6,15 @@ #include "bliss/bliss_index.h" #include #include +#include namespace bliss { template class BlissImprintsIndex : public BlissIndex { public: - BlissImprintsIndex(int blocksize = 64, int maxbins = 64) : blocksize_(blocksize), maxbins_(maxbins) { - imprints_ = new Imprints(blocksize, maxbins); + BlissImprintsIndex(int blocksize = 64, int maxbins = 64, std::string type_name = std::string("unsigned int")) : blocksize_(blocksize), maxbins_(maxbins) { + imprints_ = new Imprints(blocksize, maxbins, type_name); // std::cout << "column initiated" << std::endl; }; @@ -51,6 +52,8 @@ class BlissImprintsIndex : public BlissIndex { void end_routine() override {} private: Imprints *imprints_; + int blocksize_, maxbins_; + std::string type_name; }; } // namespace bliss From 8f47eaa925d0328a7f12fd3046f94c4760ef92ad Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Mon, 4 Nov 2024 15:36:51 +0800 Subject: [PATCH 06/24] remove Boost library, replaced by a type_name input in construct function --- src/bliss/bench_imprints.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bliss/bench_imprints.h b/src/bliss/bench_imprints.h index ae1fdc3..07ebae9 100644 --- a/src/bliss/bench_imprints.h +++ b/src/bliss/bench_imprints.h @@ -5,7 +5,6 @@ #include "bliss/bliss_index.h" #include -#include #include namespace bliss { From 8a3cfd7f34214aea6e382a6c90cd2dfea16a6993 Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Mon, 4 Nov 2024 15:43:53 +0800 Subject: [PATCH 07/24] default arg for imprint changes --- src/bliss/bench_imprints.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bliss/bench_imprints.h b/src/bliss/bench_imprints.h index 07ebae9..248134c 100644 --- a/src/bliss/bench_imprints.h +++ b/src/bliss/bench_imprints.h @@ -12,7 +12,7 @@ namespace bliss { template class BlissImprintsIndex : public BlissIndex { public: - BlissImprintsIndex(int blocksize = 64, int maxbins = 64, std::string type_name = std::string("unsigned int")) : blocksize_(blocksize), maxbins_(maxbins) { + BlissImprintsIndex(int blocksize = 64, int maxbins = 64, std::string type_name = std::string("unsigned long")) : blocksize_(blocksize), maxbins_(maxbins) { imprints_ = new Imprints(blocksize, maxbins, type_name); // std::cout << "column initiated" << std::endl; }; From 758592961c4b28a7850322f34e374864add1e2bb Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Mon, 4 Nov 2024 16:00:07 +0800 Subject: [PATCH 08/24] change tests --- tests/test_imprints/imprint_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_imprints/imprint_tests.cpp b/tests/test_imprints/imprint_tests.cpp index 8662019..60bd21e 100644 --- a/tests/test_imprints/imprint_tests.cpp +++ b/tests/test_imprints/imprint_tests.cpp @@ -3,7 +3,7 @@ class ImprintsTest : public BlissIndexTest {}; TEST_F(ImprintsTest, TestImprint_Random) { - index.reset(new bliss::BlissImprintsIndex()); + index.reset(new bliss::BlissImprintsIndex(64, 64, std::string("unsigned long"))); std::vector data; GenerateData(data, num_keys, false); std::vector < std::pair > bulkload_data; From ac02be5fe49dc0672478d6a6e2d0e25db4b94259 Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Mon, 4 Nov 2024 16:10:28 +0800 Subject: [PATCH 09/24] add build test for Imprints --- tests/test_imprints/imprint_tests.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_imprints/imprint_tests.cpp b/tests/test_imprints/imprint_tests.cpp index 60bd21e..e5509e9 100644 --- a/tests/test_imprints/imprint_tests.cpp +++ b/tests/test_imprints/imprint_tests.cpp @@ -2,6 +2,20 @@ class ImprintsTest : public BlissIndexTest {}; +TEST_F(ImprintsTest, TestImprint_Build) { + index.reset(new bliss::BlissImprintsIndex(64, 64, std::string("unsigned long"))); + std::vector data; + GenerateData(data, num_keys, false); + std::vector < std::pair > bulkload_data; + for (size_t i = 0; i < data.size(); i ++) { + bulkload_data.push_back(std::make_pair(i, data[i])); + } + index->bulkload(bulkload_data); + // auto insert_start = data.begin(); + // auto insert_end = data.end(); + // executor::execute_inserts(*index, insert_start, insert_end); +} + TEST_F(ImprintsTest, TestImprint_Random) { index.reset(new bliss::BlissImprintsIndex(64, 64, std::string("unsigned long"))); std::vector data; From c6503d57f5b232009ea8768b3429d1341af86d14 Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Mon, 4 Nov 2024 16:17:48 +0800 Subject: [PATCH 10/24] remove 1.txt --- 1.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 1.txt diff --git a/1.txt b/1.txt deleted file mode 100644 index e69de29..0000000 From ad8b239ff03dc62d415a9961d530ca623848003c Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Thu, 17 Oct 2024 18:31:19 +0800 Subject: [PATCH 11/24] Column Imprints integration --- 1.txt | 0 CMakeLists.txt | 2 + external/CMakeLists.txt | 16 ++- src/bliss/bench_imprints.h | 143 ++++++++++++++++++++++++++ src/bliss/bliss_index.h | 3 + src/bliss_bench.cpp | 3 + tests/CMakeLists.txt | 3 +- tests/bliss_index_tests.h | 1 + tests/test_imprints/CMakeLists.txt | 9 ++ tests/test_imprints/imprint_tests.cpp | 34 ++++++ 10 files changed, 212 insertions(+), 2 deletions(-) create mode 100644 1.txt create mode 100644 src/bliss/bench_imprints.h create mode 100644 tests/test_imprints/CMakeLists.txt create mode 100644 tests/test_imprints/imprint_tests.cpp diff --git a/1.txt b/1.txt new file mode 100644 index 0000000..e69de29 diff --git a/CMakeLists.txt b/CMakeLists.txt index 66226eb..8698192 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -51,6 +51,7 @@ add_library(bliss OBJECT ${CMAKE_SOURCE_DIR}/src/bliss/bench_lipp.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_alex.h ${CMAKE_SOURCE_DIR}/src/bliss/bench_btree.h + ${CMAKE_SOURCE_DIR}/src/bliss/bench_imprints.h ) target_compile_features(bliss PUBLIC @@ -63,6 +64,7 @@ target_link_libraries(bliss PUBLIC alex lipp tlx + imprints ) target_include_directories(bliss PUBLIC diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 3531b2f..6db1e75 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -70,4 +70,18 @@ if (NOT tlx_POPULATED) endif() add_library(tlx INTERFACE) -target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/) \ No newline at end of file +target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/) + + +FetchContent_Declare( + imprints + GIT_REPOSITORY git@github.com:altramarine/imprints.git + GIT_TAG cmake-version +) +FetchContent_GetProperties(imprints) +if (NOT imprints_POPULATED) + FetchContent_Populate(imprints) +endif() + +add_library(imprints INTERFACE) +target_include_directories(imprints INTERFACE ${imprints_SOURCE_DIR}/include) \ No newline at end of file diff --git a/src/bliss/bench_imprints.h b/src/bliss/bench_imprints.h new file mode 100644 index 0000000..cc7604b --- /dev/null +++ b/src/bliss/bench_imprints.h @@ -0,0 +1,143 @@ +#ifndef BLISS_BENCH_IMPRINTS +#define BLISS_BENCH_IMPRINTS + +#include + +#include "bliss/bliss_index.h" +#include +#include + +namespace bliss { + +template +class BlissImprintsIndex : public BlissIndex { + public: + BlissImprintsIndex(int blocksize = 64, int maxbins = 64) : blocksize_(blocksize), maxbins_(maxbins) { + column = (Column *) malloc(sizeof(Column)); + strcpy(column->type_name, boost::typeindex::type_id().pretty_name().c_str()); + // std::cout << boost::typeindex::type_id().pretty_name() << std::endl; + // printf("typename is: %s\n", column->type_name); + if (strcmp(column->type_name, "tinyint") == 0 || strcmp(column->type_name, "boolean") == 0) { + column->coltype = TYPE_bte; + column->min.bval = 127; + column->max.bval = -127; + } else if (strcmp(column->type_name, "char") == 0 || strcmp(column->type_name,"smallint")== 0 || strcmp(column->type_name, "short")== 0) { + column->coltype = TYPE_sht; + column->min.sval = 32767; + column->max.sval = -32767; + } else if (strcmp(column->type_name, "decimal") == 0 || strcmp(column->type_name, "int") == 0 || strcmp(column->type_name, "date") == 0) { + column->coltype = TYPE_int; + column->min.ival = INT_MAX; + column->max.ival = INT_MIN; + } else if (strcmp(column->type_name, "long") == 0 || strcmp(column->type_name, "long int") == 0) { + column->coltype = TYPE_lng; + column->min.lval = LONG_MAX; + column->max.lval = LONG_MIN; + } else if (strcmp(column->type_name, "float") == 0 || strcmp(column->type_name, "real") == 0) { + column->coltype= TYPE_flt; + column->min.fval = FLT_MAX; + column->max.fval = FLT_MIN; + } else if (strcmp(column->type_name, "double") == 0 ) { + column->coltype = TYPE_dbl; + column->min.dval = DBL_MAX; + column->max.dval = -DBL_MAX; + } else if (strcmp(column->type_name, "oid") == 0 || strcmp(column->type_name, "unsigned long") == 0) { + column->coltype = TYPE_oid; + column->min.ulval = ULONG_MAX; + column->max.ulval = 0; + } else { + printf("error: type [%s] not supported\n", column->type_name); + std::runtime_error("[column imprints]: type not supported"); + } + std::cout << "column initiated" << std::endl; + }; + + + void bulkload( + std::vector> values) override { + // expects the pairs to be pre-sorted before performing bulk load + // this->_index.bulk_load(values.begin(), values.end()); + // binning() + column->col = (char *)new VALUE_TYPE[values.size()]; + for(size_t i = 0; i < values.size(); i++) { + ((VALUE_TYPE *)column->col)[i] = values[i].second; + // std::cout << i << " " << values[i].second << std::endl; + } + const int stride[14]= { 0,0,0,1,2,0,4,8,0,0,4,8,8,0}; + int vpp = PAGESIZE/stride[column->coltype]; + if (vpp == 0) { + printf("rows per pages is 0\n"); + std::runtime_error("rows per pages is 0"); + // return -1; + } + int pages = column->colcount/vpp + 1; + if (pages > MAX_IMPS) { + printf("there are too many pages %ld\n", pages); + std::runtime_error("column imprints: too many pages"); + // return -1; + } + column->typesize = stride[column->coltype]; + column->colcount = values.size(); + index = create_imprints(column, blocksize_, maxbins_, 1); + } + + unsigned int * get(VALUE_TYPE low, VALUE_TYPE high) { + ValRecord low_, high_; + switch (column->coltype) { + case TYPE_bte: + low_.bval = low; + high_.bval = high; + break; + case TYPE_sht: + low_.sval = low; + high_.sval = high; + break; + case TYPE_int: + low_.ival = low; + high_.ival = high; + // setqueryrange(ival); + break; + case TYPE_lng: + low_.lval = low; + high_.lval = high; + break; + case TYPE_oid: + low_.ulval = low; + high_.ulval = high; + // setqueryrange(ulval); + break; + case TYPE_flt: + low_.fval = low; + high_.fval = high; + // setqueryrange(fval); + break; + case TYPE_dbl: + low_.dval = low; + high_.dval = high; + // setqueryrange(dval); + } + uint32_t *result_data = new uint32_t[(column->colcount + 31) / 32]; + memset(result_data, 0, sizeof(uint32_t) * ((column->colcount + 31) / 32)); + auto dummy = usec(); + imprints_simd_scan(column, index, low_, high_, &dummy, result_data); + dummy = usec(); + return result_data; + } + + bool get(KEY_TYPE key) override { return false; } + + void put(KEY_TYPE key, VALUE_TYPE value) { + std::runtime_error("Column Imprints does not support put(key, value)."); + return; + } + + void end_routine() override {} + private: + Column *column; + Imprints_index *index; + int blocksize_, maxbins_; +}; + +} // namespace bliss + +#endif // !BLISS_BENCH_BTREE diff --git a/src/bliss/bliss_index.h b/src/bliss/bliss_index.h index e1dfe32..7c5c04b 100644 --- a/src/bliss/bliss_index.h +++ b/src/bliss/bliss_index.h @@ -11,6 +11,9 @@ class BlissIndex { public: virtual void bulkload(std::vector> values); virtual bool get(KEY_TYPE key) = 0; + virtual uint32_t * get(VALUE_TYPE start, VALUE_TYPE end) { + std::runtime_error("get(VALUE_TYPE start, VALUE_TYPE end) is not yet implemented"); + }; virtual void put(KEY_TYPE key, VALUE_TYPE value) = 0; virtual void end_routine() = 0; }; diff --git a/src/bliss_bench.cpp b/src/bliss_bench.cpp index 97b8e32..5c4a8d1 100644 --- a/src/bliss_bench.cpp +++ b/src/bliss_bench.cpp @@ -15,6 +15,7 @@ #include "bliss/util/execute.h" #include "bliss/util/reader.h" #include "bliss/util/timer.h" +#include "bliss/bench_imprints.h" using namespace bliss::utils; @@ -168,6 +169,8 @@ int main(int argc, char *argv[]) { index.reset(new bliss::BlissLippIndex()); } else if (config.index == "btree") { index.reset(new bliss::BlissBTreeIndex()); + } else if (config.index == "imprints") { + index.reset(new bliss::BlissImprintsIndex(64, 64)); } else { spdlog::error(config.index + " not implemented yet", 1); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e916e4f..856a46b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -16,4 +16,5 @@ target_include_directories(bliss_test_infra PUBLIC add_subdirectory(test_alex) add_subdirectory(test_lipp) -add_subdirectory(test_btree) \ No newline at end of file +add_subdirectory(test_btree) +add_subdirectory(test_imprints) \ No newline at end of file diff --git a/tests/bliss_index_tests.h b/tests/bliss_index_tests.h index edb14fa..668338f 100644 --- a/tests/bliss_index_tests.h +++ b/tests/bliss_index_tests.h @@ -12,6 +12,7 @@ #include "bliss/bench_alex.h" #include "bliss/bench_btree.h" #include "bliss/bench_lipp.h" +#include "bliss/bench_imprints.h" #include "bliss/bliss_index.h" #include "bliss/util/args.h" #include "bliss/util/config.h" diff --git a/tests/test_imprints/CMakeLists.txt b/tests/test_imprints/CMakeLists.txt new file mode 100644 index 0000000..b44a9ac --- /dev/null +++ b/tests/test_imprints/CMakeLists.txt @@ -0,0 +1,9 @@ +get_filename_component(EXEC ${CMAKE_CURRENT_SOURCE_DIR} NAME) +file(GLOB_RECURSE CPP_TESTS "*_tests.cpp") +add_executable(${EXEC} ${CPP_TESTS}) +target_link_libraries(${EXEC} PRIVATE +bliss +bliss_test_infra +GTest::gtest_main) +include(GoogleTest) +gtest_discover_tests(${EXEC}) \ No newline at end of file diff --git a/tests/test_imprints/imprint_tests.cpp b/tests/test_imprints/imprint_tests.cpp new file mode 100644 index 0000000..8662019 --- /dev/null +++ b/tests/test_imprints/imprint_tests.cpp @@ -0,0 +1,34 @@ +#include "bliss_index_tests.h" + +class ImprintsTest : public BlissIndexTest {}; + +TEST_F(ImprintsTest, TestImprint_Random) { + index.reset(new bliss::BlissImprintsIndex()); + std::vector data; + GenerateData(data, num_keys, false); + std::vector < std::pair > bulkload_data; + for (size_t i = 0; i < data.size(); i ++) { + bulkload_data.push_back(std::make_pair(i, data[i])); + } + index->bulkload(bulkload_data); + auto minimum = 0; + // auto insert_start = data.begin(); + // auto insert_end = data.end(); + // executor::execute_inserts(*index, insert_start, insert_end); + + for (size_t key = 0; key < num_keys; key += std::max(1,num_keys / 20)) { + uint32_t *result = nullptr; + result = index->get(minimum, data[key]); + for(size_t i = 0; i < num_keys; i ++) { + // if((data[i] <= data[key] && data[i] > minimum) != ((result[i / 32] >> (i & 31)) & 1u)) { + // std::cout << minimum << " < " << data[i] << " <= " << data[key] << " " << ((result[i / 32] >> (i & 31)) & 1u) << " :: idx = " << i << std::endl; + // } + if((result[i / 32] >> (i & 31)) & 1u) { + EXPECT_TRUE(data[i] <= data[key] && data[i] > minimum); + } else { + EXPECT_FALSE(data[i] <= data[key] && data[i] > minimum); + } + } + free(result); + } +} \ No newline at end of file From 1bb32a77345941bdbe9a87e6d806510e17eb132e Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Fri, 25 Oct 2024 18:54:33 +0800 Subject: [PATCH 12/24] fix gitfetch, pack column imprints --- external/CMakeLists.txt | 2 +- src/bliss/bench_imprints.h | 117 +++++-------------------------------- 2 files changed, 17 insertions(+), 102 deletions(-) diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 6db1e75..8cb8ff5 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -75,7 +75,7 @@ target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/) FetchContent_Declare( imprints - GIT_REPOSITORY git@github.com:altramarine/imprints.git + GIT_REPOSITORY https://github.com:altramarine/imprints.git GIT_TAG cmake-version ) FetchContent_GetProperties(imprints) diff --git a/src/bliss/bench_imprints.h b/src/bliss/bench_imprints.h index cc7604b..dcd0587 100644 --- a/src/bliss/bench_imprints.h +++ b/src/bliss/bench_imprints.h @@ -13,118 +13,35 @@ template class BlissImprintsIndex : public BlissIndex { public: BlissImprintsIndex(int blocksize = 64, int maxbins = 64) : blocksize_(blocksize), maxbins_(maxbins) { - column = (Column *) malloc(sizeof(Column)); - strcpy(column->type_name, boost::typeindex::type_id().pretty_name().c_str()); - // std::cout << boost::typeindex::type_id().pretty_name() << std::endl; - // printf("typename is: %s\n", column->type_name); - if (strcmp(column->type_name, "tinyint") == 0 || strcmp(column->type_name, "boolean") == 0) { - column->coltype = TYPE_bte; - column->min.bval = 127; - column->max.bval = -127; - } else if (strcmp(column->type_name, "char") == 0 || strcmp(column->type_name,"smallint")== 0 || strcmp(column->type_name, "short")== 0) { - column->coltype = TYPE_sht; - column->min.sval = 32767; - column->max.sval = -32767; - } else if (strcmp(column->type_name, "decimal") == 0 || strcmp(column->type_name, "int") == 0 || strcmp(column->type_name, "date") == 0) { - column->coltype = TYPE_int; - column->min.ival = INT_MAX; - column->max.ival = INT_MIN; - } else if (strcmp(column->type_name, "long") == 0 || strcmp(column->type_name, "long int") == 0) { - column->coltype = TYPE_lng; - column->min.lval = LONG_MAX; - column->max.lval = LONG_MIN; - } else if (strcmp(column->type_name, "float") == 0 || strcmp(column->type_name, "real") == 0) { - column->coltype= TYPE_flt; - column->min.fval = FLT_MAX; - column->max.fval = FLT_MIN; - } else if (strcmp(column->type_name, "double") == 0 ) { - column->coltype = TYPE_dbl; - column->min.dval = DBL_MAX; - column->max.dval = -DBL_MAX; - } else if (strcmp(column->type_name, "oid") == 0 || strcmp(column->type_name, "unsigned long") == 0) { - column->coltype = TYPE_oid; - column->min.ulval = ULONG_MAX; - column->max.ulval = 0; - } else { - printf("error: type [%s] not supported\n", column->type_name); - std::runtime_error("[column imprints]: type not supported"); - } - std::cout << "column initiated" << std::endl; + imprints_ = new Imprints(blocksize, maxbins); + // std::cout << "column initiated" << std::endl; }; + ~BlissImprintsIndex() { + delete imprints_; + } void bulkload( std::vector> values) override { // expects the pairs to be pre-sorted before performing bulk load // this->_index.bulk_load(values.begin(), values.end()); // binning() - column->col = (char *)new VALUE_TYPE[values.size()]; - for(size_t i = 0; i < values.size(); i++) { - ((VALUE_TYPE *)column->col)[i] = values[i].second; - // std::cout << i << " " << values[i].second << std::endl; - } - const int stride[14]= { 0,0,0,1,2,0,4,8,0,0,4,8,8,0}; - int vpp = PAGESIZE/stride[column->coltype]; - if (vpp == 0) { - printf("rows per pages is 0\n"); - std::runtime_error("rows per pages is 0"); - // return -1; - } - int pages = column->colcount/vpp + 1; - if (pages > MAX_IMPS) { - printf("there are too many pages %ld\n", pages); - std::runtime_error("column imprints: too many pages"); - // return -1; + std::vector vals; + for(auto x: values) { + vals.push_back(x.second); } - column->typesize = stride[column->coltype]; - column->colcount = values.size(); - index = create_imprints(column, blocksize_, maxbins_, 1); + imprints_->bulkload(vals); } + unsigned int * get(VALUE_TYPE low, VALUE_TYPE high) { - ValRecord low_, high_; - switch (column->coltype) { - case TYPE_bte: - low_.bval = low; - high_.bval = high; - break; - case TYPE_sht: - low_.sval = low; - high_.sval = high; - break; - case TYPE_int: - low_.ival = low; - high_.ival = high; - // setqueryrange(ival); - break; - case TYPE_lng: - low_.lval = low; - high_.lval = high; - break; - case TYPE_oid: - low_.ulval = low; - high_.ulval = high; - // setqueryrange(ulval); - break; - case TYPE_flt: - low_.fval = low; - high_.fval = high; - // setqueryrange(fval); - break; - case TYPE_dbl: - low_.dval = low; - high_.dval = high; - // setqueryrange(dval); - } - uint32_t *result_data = new uint32_t[(column->colcount + 31) / 32]; - memset(result_data, 0, sizeof(uint32_t) * ((column->colcount + 31) / 32)); - auto dummy = usec(); - imprints_simd_scan(column, index, low_, high_, &dummy, result_data); - dummy = usec(); - return result_data; + return imprints_->range_scan(low, high); } - bool get(KEY_TYPE key) override { return false; } + bool get(KEY_TYPE key) override { + std::runtime_error("Column Imprints does not support get(key, value)."); + return false; + } void put(KEY_TYPE key, VALUE_TYPE value) { std::runtime_error("Column Imprints does not support put(key, value)."); @@ -133,9 +50,7 @@ class BlissImprintsIndex : public BlissIndex { void end_routine() override {} private: - Column *column; - Imprints_index *index; - int blocksize_, maxbins_; + Imprints *imprints_; }; } // namespace bliss From 2186e2f25930674bd8d025609a072bf89c068caf Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Fri, 25 Oct 2024 18:57:15 +0800 Subject: [PATCH 13/24] fix git fetch url --- external/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 8cb8ff5..38b5a13 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -75,7 +75,7 @@ target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/) FetchContent_Declare( imprints - GIT_REPOSITORY https://github.com:altramarine/imprints.git + GIT_REPOSITORY https://github.com/altramarine/imprints.git GIT_TAG cmake-version ) FetchContent_GetProperties(imprints) From b2344cf3f3b940cfcee8c278bccb650f0b172326 Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Fri, 25 Oct 2024 19:08:13 +0800 Subject: [PATCH 14/24] fixing Cmake --- external/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 38b5a13..a74d8e4 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -72,7 +72,7 @@ endif() add_library(tlx INTERFACE) target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/) - +find_package(Boost REQUIRED) FetchContent_Declare( imprints GIT_REPOSITORY https://github.com/altramarine/imprints.git From 18ef4e578b7c75ed25f5359b5fcd5f7af63f06ec Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Mon, 4 Nov 2024 15:33:57 +0800 Subject: [PATCH 15/24] remove Boost library, replaced by an type_name input in constructing funtion --- external/CMakeLists.txt | 2 +- src/bliss/bench_imprints.h | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index a74d8e4..242a112 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -72,7 +72,7 @@ endif() add_library(tlx INTERFACE) target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/) -find_package(Boost REQUIRED) +# find_package(Boost REQUIRED) FetchContent_Declare( imprints GIT_REPOSITORY https://github.com/altramarine/imprints.git diff --git a/src/bliss/bench_imprints.h b/src/bliss/bench_imprints.h index dcd0587..ae1fdc3 100644 --- a/src/bliss/bench_imprints.h +++ b/src/bliss/bench_imprints.h @@ -6,14 +6,15 @@ #include "bliss/bliss_index.h" #include #include +#include namespace bliss { template class BlissImprintsIndex : public BlissIndex { public: - BlissImprintsIndex(int blocksize = 64, int maxbins = 64) : blocksize_(blocksize), maxbins_(maxbins) { - imprints_ = new Imprints(blocksize, maxbins); + BlissImprintsIndex(int blocksize = 64, int maxbins = 64, std::string type_name = std::string("unsigned int")) : blocksize_(blocksize), maxbins_(maxbins) { + imprints_ = new Imprints(blocksize, maxbins, type_name); // std::cout << "column initiated" << std::endl; }; @@ -51,6 +52,8 @@ class BlissImprintsIndex : public BlissIndex { void end_routine() override {} private: Imprints *imprints_; + int blocksize_, maxbins_; + std::string type_name; }; } // namespace bliss From 3c524af6ece8e7164ee8f771926bad006896afb1 Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Mon, 4 Nov 2024 15:36:51 +0800 Subject: [PATCH 16/24] remove Boost library, replaced by a type_name input in construct function --- src/bliss/bench_imprints.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/bliss/bench_imprints.h b/src/bliss/bench_imprints.h index ae1fdc3..07ebae9 100644 --- a/src/bliss/bench_imprints.h +++ b/src/bliss/bench_imprints.h @@ -5,7 +5,6 @@ #include "bliss/bliss_index.h" #include -#include #include namespace bliss { From 3e95cec6213e72724024775a39dc1d1bbf5d6a85 Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Mon, 4 Nov 2024 15:43:53 +0800 Subject: [PATCH 17/24] default arg for imprint changes --- src/bliss/bench_imprints.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bliss/bench_imprints.h b/src/bliss/bench_imprints.h index 07ebae9..248134c 100644 --- a/src/bliss/bench_imprints.h +++ b/src/bliss/bench_imprints.h @@ -12,7 +12,7 @@ namespace bliss { template class BlissImprintsIndex : public BlissIndex { public: - BlissImprintsIndex(int blocksize = 64, int maxbins = 64, std::string type_name = std::string("unsigned int")) : blocksize_(blocksize), maxbins_(maxbins) { + BlissImprintsIndex(int blocksize = 64, int maxbins = 64, std::string type_name = std::string("unsigned long")) : blocksize_(blocksize), maxbins_(maxbins) { imprints_ = new Imprints(blocksize, maxbins, type_name); // std::cout << "column initiated" << std::endl; }; From 63a95662bf271eeb42592df19ca6836e860f7881 Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Mon, 4 Nov 2024 16:00:07 +0800 Subject: [PATCH 18/24] change tests --- tests/test_imprints/imprint_tests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_imprints/imprint_tests.cpp b/tests/test_imprints/imprint_tests.cpp index 8662019..60bd21e 100644 --- a/tests/test_imprints/imprint_tests.cpp +++ b/tests/test_imprints/imprint_tests.cpp @@ -3,7 +3,7 @@ class ImprintsTest : public BlissIndexTest {}; TEST_F(ImprintsTest, TestImprint_Random) { - index.reset(new bliss::BlissImprintsIndex()); + index.reset(new bliss::BlissImprintsIndex(64, 64, std::string("unsigned long"))); std::vector data; GenerateData(data, num_keys, false); std::vector < std::pair > bulkload_data; From 5de3e3b397ccb26048c177b109d625b117f21398 Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Mon, 4 Nov 2024 16:10:28 +0800 Subject: [PATCH 19/24] add build test for Imprints --- tests/test_imprints/imprint_tests.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_imprints/imprint_tests.cpp b/tests/test_imprints/imprint_tests.cpp index 60bd21e..e5509e9 100644 --- a/tests/test_imprints/imprint_tests.cpp +++ b/tests/test_imprints/imprint_tests.cpp @@ -2,6 +2,20 @@ class ImprintsTest : public BlissIndexTest {}; +TEST_F(ImprintsTest, TestImprint_Build) { + index.reset(new bliss::BlissImprintsIndex(64, 64, std::string("unsigned long"))); + std::vector data; + GenerateData(data, num_keys, false); + std::vector < std::pair > bulkload_data; + for (size_t i = 0; i < data.size(); i ++) { + bulkload_data.push_back(std::make_pair(i, data[i])); + } + index->bulkload(bulkload_data); + // auto insert_start = data.begin(); + // auto insert_end = data.end(); + // executor::execute_inserts(*index, insert_start, insert_end); +} + TEST_F(ImprintsTest, TestImprint_Random) { index.reset(new bliss::BlissImprintsIndex(64, 64, std::string("unsigned long"))); std::vector data; From 1ba6bd62e20ffb77dcf68adbd581fcbe43619dbc Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Mon, 4 Nov 2024 16:17:48 +0800 Subject: [PATCH 20/24] remove 1.txt --- 1.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 1.txt diff --git a/1.txt b/1.txt deleted file mode 100644 index e69de29..0000000 From 5cebac2ce2ddce1ca776f8780a810b374978b8d7 Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Thu, 5 Dec 2024 04:20:43 +0800 Subject: [PATCH 21/24] slightly changed include --- src/bliss/bench_imprints.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bliss/bench_imprints.h b/src/bliss/bench_imprints.h index 248134c..aca4686 100644 --- a/src/bliss/bench_imprints.h +++ b/src/bliss/bench_imprints.h @@ -4,7 +4,7 @@ #include #include "bliss/bliss_index.h" -#include +#include "column_imprints.h" #include namespace bliss { From bb5ef9fc0e641cf219270434de714c0724a27c6d Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Mon, 3 Feb 2025 16:48:23 +0800 Subject: [PATCH 22/24] remove unused code & add comments --- src/bliss/bench_imprints.h | 4 ---- src/bliss/util/execute.h | 1 - src/bliss_bench.cpp | 2 +- tests/test_imprints/imprint_tests.cpp | 7 ------- 4 files changed, 1 insertion(+), 13 deletions(-) diff --git a/src/bliss/bench_imprints.h b/src/bliss/bench_imprints.h index aca4686..f134a34 100644 --- a/src/bliss/bench_imprints.h +++ b/src/bliss/bench_imprints.h @@ -14,7 +14,6 @@ class BlissImprintsIndex : public BlissIndex { public: BlissImprintsIndex(int blocksize = 64, int maxbins = 64, std::string type_name = std::string("unsigned long")) : blocksize_(blocksize), maxbins_(maxbins) { imprints_ = new Imprints(blocksize, maxbins, type_name); - // std::cout << "column initiated" << std::endl; }; ~BlissImprintsIndex() { @@ -23,9 +22,6 @@ class BlissImprintsIndex : public BlissIndex { void bulkload( std::vector> values) override { - // expects the pairs to be pre-sorted before performing bulk load - // this->_index.bulk_load(values.begin(), values.end()); - // binning() std::vector vals; for(auto x: values) { vals.push_back(x.second); diff --git a/src/bliss/util/execute.h b/src/bliss/util/execute.h index dd50f70..5ec907a 100644 --- a/src/bliss/util/execute.h +++ b/src/bliss/util/execute.h @@ -45,5 +45,4 @@ void execute_non_empty_reads(bliss::BlissIndex &tree, } // namespace executor } // namespace utils } // namespace bliss - #endif diff --git a/src/bliss_bench.cpp b/src/bliss_bench.cpp index 5c4a8d1..ea11725 100644 --- a/src/bliss_bench.cpp +++ b/src/bliss_bench.cpp @@ -170,7 +170,7 @@ int main(int argc, char *argv[]) { } else if (config.index == "btree") { index.reset(new bliss::BlissBTreeIndex()); } else if (config.index == "imprints") { - index.reset(new bliss::BlissImprintsIndex(64, 64)); + index.reset(new bliss::BlissImprintsIndex(/* block_size */64, /* max_bins */64)); } else { spdlog::error(config.index + " not implemented yet", 1); } diff --git a/tests/test_imprints/imprint_tests.cpp b/tests/test_imprints/imprint_tests.cpp index e5509e9..328bd5c 100644 --- a/tests/test_imprints/imprint_tests.cpp +++ b/tests/test_imprints/imprint_tests.cpp @@ -26,17 +26,10 @@ TEST_F(ImprintsTest, TestImprint_Random) { } index->bulkload(bulkload_data); auto minimum = 0; - // auto insert_start = data.begin(); - // auto insert_end = data.end(); - // executor::execute_inserts(*index, insert_start, insert_end); - for (size_t key = 0; key < num_keys; key += std::max(1,num_keys / 20)) { uint32_t *result = nullptr; result = index->get(minimum, data[key]); for(size_t i = 0; i < num_keys; i ++) { - // if((data[i] <= data[key] && data[i] > minimum) != ((result[i / 32] >> (i & 31)) & 1u)) { - // std::cout << minimum << " < " << data[i] << " <= " << data[key] << " " << ((result[i / 32] >> (i & 31)) & 1u) << " :: idx = " << i << std::endl; - // } if((result[i / 32] >> (i & 31)) & 1u) { EXPECT_TRUE(data[i] <= data[key] && data[i] > minimum); } else { From 39e8fca40fd38bca9e1075ec0d696732ecba8f3f Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Thu, 6 Feb 2025 16:23:47 +0800 Subject: [PATCH 23/24] remove unused tests --- tests/test_imprints/imprint_tests.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tests/test_imprints/imprint_tests.cpp b/tests/test_imprints/imprint_tests.cpp index 328bd5c..dd7a69d 100644 --- a/tests/test_imprints/imprint_tests.cpp +++ b/tests/test_imprints/imprint_tests.cpp @@ -2,20 +2,6 @@ class ImprintsTest : public BlissIndexTest {}; -TEST_F(ImprintsTest, TestImprint_Build) { - index.reset(new bliss::BlissImprintsIndex(64, 64, std::string("unsigned long"))); - std::vector data; - GenerateData(data, num_keys, false); - std::vector < std::pair > bulkload_data; - for (size_t i = 0; i < data.size(); i ++) { - bulkload_data.push_back(std::make_pair(i, data[i])); - } - index->bulkload(bulkload_data); - // auto insert_start = data.begin(); - // auto insert_end = data.end(); - // executor::execute_inserts(*index, insert_start, insert_end); -} - TEST_F(ImprintsTest, TestImprint_Random) { index.reset(new bliss::BlissImprintsIndex(64, 64, std::string("unsigned long"))); std::vector data; From 28f74f36f118f93399763722ea162f69166d432a Mon Sep 17 00:00:00 2001 From: Yanpeng Wei Date: Thu, 6 Feb 2025 16:31:31 +0800 Subject: [PATCH 24/24] resove conflicts --- CMakeLists.txt | 3 --- external/CMakeLists.txt | 3 --- src/bliss_bench.cpp | 3 --- tests/CMakeLists.txt | 3 --- tests/bliss_index_tests.h | 3 --- 5 files changed, 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4971863..66b181a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,13 +67,10 @@ target_link_libraries(bliss PUBLIC alex lipp tlx -<<<<<<< HEAD imprints -======= skiplist pgm art ->>>>>>> 92615345b27bee160b0af305c6ffc0870d9725b1 ) target_include_directories(bliss PUBLIC diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt index 27b5bd9..3feffa8 100644 --- a/external/CMakeLists.txt +++ b/external/CMakeLists.txt @@ -72,7 +72,6 @@ endif() add_library(tlx INTERFACE) target_include_directories(tlx INTERFACE ${tlx_SOURCE_DIR}/) -<<<<<<< HEAD # find_package(Boost REQUIRED) FetchContent_Declare( imprints @@ -86,7 +85,6 @@ endif() add_library(imprints INTERFACE) target_include_directories(imprints INTERFACE ${imprints_SOURCE_DIR}/include) -======= FetchContent_Declare( skiplist @@ -130,4 +128,3 @@ endif() add_library(pgm INTERFACE) target_include_directories(pgm INTERFACE ${pgm_SOURCE_DIR}) ->>>>>>> 92615345b27bee160b0af305c6ffc0870d9725b1 diff --git a/src/bliss_bench.cpp b/src/bliss_bench.cpp index 60f6327..9fc4b05 100644 --- a/src/bliss_bench.cpp +++ b/src/bliss_bench.cpp @@ -174,17 +174,14 @@ int main(int argc, char *argv[]) { index.reset(new bliss::BlissLippIndex()); } else if (config.index == "btree") { index.reset(new bliss::BlissBTreeIndex()); -<<<<<<< HEAD } else if (config.index == "imprints") { index.reset(new bliss::BlissImprintsIndex(/* block_size */64, /* max_bins */64)); -======= } else if (config.index == "skiplist") { index.reset(new bliss::BlissSkipListIndex()); } else if (config.index == "art") { index.reset(new bliss::BlissARTIndex()); } else if (config.index == "pgm") { index.reset(new bliss::BlissPGMIndex()); ->>>>>>> 92615345b27bee160b0af305c6ffc0870d9725b1 } else { spdlog::error(config.index + " not implemented yet", 1); } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b24a398..784b18b 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -17,9 +17,6 @@ target_include_directories(bliss_test_infra PUBLIC add_subdirectory(test_alex) add_subdirectory(test_lipp) add_subdirectory(test_btree) -<<<<<<< HEAD add_subdirectory(test_imprints) -======= add_subdirectory(test_skiplist) add_subdirectory(test_art) ->>>>>>> 92615345b27bee160b0af305c6ffc0870d9725b1 diff --git a/tests/bliss_index_tests.h b/tests/bliss_index_tests.h index d305b6a..286ba93 100644 --- a/tests/bliss_index_tests.h +++ b/tests/bliss_index_tests.h @@ -18,12 +18,9 @@ #include "bliss/bench_art.h" #include "bliss/bench_btree.h" #include "bliss/bench_lipp.h" -<<<<<<< HEAD #include "bliss/bench_imprints.h" -======= #include "bliss/bench_skiplist.h" #include "bliss/bench_pgm.h" ->>>>>>> 92615345b27bee160b0af305c6ffc0870d9725b1 #include "bliss/bliss_index.h" #include "bliss/util/args.h" #include "bliss/util/config.h"