diff --git a/CMakeLists.txt b/CMakeLists.txt index cea3fc7ec..dfbbd60ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -156,6 +156,7 @@ if (TON_USE_ROCKSDB) set(WITH_GFLAGS OFF CACHE BOOL "build with GFlags") set(WITH_TESTS OFF CACHE BOOL "build with tests") set(WITH_TOOLS OFF CACHE BOOL "build with tools") + set(USE_RTTI ON CACHE BOOL "use rtti") set(FAIL_ON_WARNINGS OFF CACHE BOOL "fail on warnings") message("Add rocksdb") add_subdirectory(third-party/rocksdb EXCLUDE_FROM_ALL) diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt index 069083381..530dc7ba5 100644 --- a/crypto/CMakeLists.txt +++ b/crypto/CMakeLists.txt @@ -144,6 +144,7 @@ set(TON_CRYPTO_SOURCE set(TON_DB_SOURCE vm/db/DynamicBagOfCellsDb.cpp + vm/db/DynamicBagOfCellsDbV2.cpp vm/db/CellStorage.cpp vm/db/TonDb.cpp @@ -541,7 +542,7 @@ target_include_directories(create-state PUBLIC $ #include #include +#include #include #include "openssl/digest.hpp" +#include "storage/db.h" +#include "td/utils/VectorQueue.h" #include "vm/dict.h" -#include #include #include #include -#include +#include -namespace vm { -class ThreadExecutor : public DynamicBagOfCellsDb::AsyncExecutor { +#include +#include +#include + +#include "td/actor/actor.h" +#include "td/utils/overloaded.h" + + +class ActorExecutor : public vm::DynamicBagOfCellsDb::AsyncExecutor { public: - explicit ThreadExecutor(size_t threads_n) { - for (size_t i = 0; i < threads_n; ++i) { - threads_.emplace_back([this]() { - while (true) { - auto task = pop_task(); - if (!task) { - break; - } - CHECK(generation_.load() % 2 == 1); - task(); - } - }); - } + ActorExecutor(size_t tn) : tn_(tn) { + scheduler_.run_in_context([&] { worker_ = td::actor::create_actor("Worker"); }); + thread_ = td::thread([this]() { scheduler_.run(); }); } - - ~ThreadExecutor() override { - for (size_t i = 0; i < threads_.size(); ++i) { - push_task({}); + ~ActorExecutor() { + scheduler_.run_in_context_external([&] { send_closure(worker_, &Worker::close); }); + thread_.join(); + } + std::string describe() const override { + return PSTRING() << "ActorExecutor(tn=" << tn_ << ")"; + } + class Worker : public td::actor::Actor { + public: + void close() { + td::actor::core::SchedulerContext::get()->stop(); + stop(); } - for (auto &t : threads_) { - t.join(); + void execute_sync(std::function f) { + f(); } - } + }; void execute_async(std::function f) override { - push_task(std::move(f)); + class Runner : public td::actor::Actor { + public: + explicit Runner(std::function f) : f_(std::move(f)) { + } + void start_up() override { + f_(); + stop(); + } + + private: + std::function f_; + }; + auto context = td::actor::SchedulerContext::get(); + if (context) { + td::actor::create_actor("executeasync", std::move(f)).release(); + } else { + scheduler_.run_in_context_external( + [&] { td::actor::create_actor("executeasync", std::move(f)).release(); }); + } } void execute_sync(std::function f) override { - auto x = generation_.load(); - std::scoped_lock lock(sync_mutex_); - CHECK(x == generation_); - CHECK(generation_.load() % 2 == 1); - f(); - CHECK(generation_.load() % 2 == 1); - } - void inc_generation() { - generation_.fetch_add(1); + auto context = td::actor::SchedulerContext::get(); + if (context) { + td::actor::send_closure(worker_, &Worker::execute_sync, std::move(f)); + } else { + scheduler_.run_in_context_external( + [&] { td::actor::send_closure(worker_, &Worker::execute_sync, std::move(f)); }); + } } private: - std::atomic generation_{0}; - std::queue, size_t>> queue_; - std::mutex queue_mutex_; - std::condition_variable cv_; - std::mutex sync_mutex_; - std::vector threads_; - - std::function pop_task() { - std::unique_lock lock(queue_mutex_); - cv_.wait(lock, [&] { return !queue_.empty(); }); - CHECK(!queue_.empty()); - auto task = std::move(queue_.front()); - queue_.pop(); - CHECK(task.second == generation_); - return task.first; - } - - void push_task(std::function task) { - { - std::scoped_lock lock(queue_mutex_); - queue_.emplace(std::move(task), generation_.load()); - } - cv_.notify_one(); - } + size_t tn_; + td::actor::Scheduler scheduler_{{tn_}, false, td::actor::Scheduler::Paused}; + td::actor::ActorOwn worker_; + td::thread thread_; }; +namespace vm { std::vector do_get_serialization_modes() { std::vector res; for (int i = 0; i < 32; i++) { @@ -324,6 +323,34 @@ TEST(Cell, sha_benchmark_threaded) { bench_threaded([n]() { return BenchSha256(n); }); } } +class BenchTasks : public td::Benchmark { + public: + explicit BenchTasks(size_t tn) : tn_(tn) { + } + + std::string get_description() const override { + return PSTRING() << "bench_tasks(threads_n=" << tn_ << ")"; + } + + void run(int n) override { + ActorExecutor executor(tn_); + for (int i = 0; i < n; i++) { + std::latch latch(tn_); + for (size_t j = 0; j < tn_; j++) { + executor.execute_async([&]() { latch.count_down(); }); + } + latch.wait(); + } + } + + private: + size_t tn_{}; +}; +TEST(Bench, Tasks) { + for (size_t tn : {1, 4, 16}) { + bench(BenchTasks(tn)); + } +} std::string serialize_boc(Ref cell, int mode = 31) { CHECK(cell.not_null()); @@ -437,6 +464,8 @@ class CellExplorer { cs_ = {}; break; } + default: + UNREACHABLE(); } } @@ -474,6 +503,8 @@ class CellExplorer { case op.ReadCellSlice: log_ << "read slice " << op.children_mask << "\n"; break; + default: + UNREACHABLE(); } } void log_cell(const Ref &cell) { @@ -627,7 +658,9 @@ TEST(Cell, MerkleProof) { auto exploration2 = CellExplorer::explore(usage_cell, exploration.ops); ASSERT_EQ(exploration.log, exploration2.log); - auto is_prunned = [&](const Ref &cell) { return exploration.visited.count(cell->get_hash()) == 0; }; + auto is_prunned = [&](const Ref &cell_to_check) { + return exploration.visited.count(cell_to_check->get_hash()) == 0; + }; auto proof = MerkleProof::generate(cell, is_prunned); // CellBuilder::virtualize(proof, 1); //ASSERT_EQ(1u, proof->get_level()); @@ -706,7 +739,7 @@ TEST(Cell, MerkleProofCombine) { check(proof_union_fast); } { - auto cell = MerkleProof::virtualize(proof12, 1); + cell = MerkleProof::virtualize(proof12, 1); auto usage_tree = std::make_shared(); auto usage_cell = UsageCell::create(cell, usage_tree->root_ptr()); @@ -927,7 +960,6 @@ TEST(TonDb, InMemoryDynamicBocSimple) { auto before = counter(); SCOPE_EXIT { LOG_CHECK(before == counter()) << before << " vs " << counter(); - ; }; td::Random::Xorshift128plus rnd{123}; auto kv = std::make_shared(); @@ -963,26 +995,193 @@ TEST(TonDb, InMemoryDynamicBocSimple) { int VERBOSITY_NAME(boc) = VERBOSITY_NAME(DEBUG) + 10; +struct CellMerger : td::Merger { + void merge_value_and_update(std::string &value, td::Slice update) override { + return CellStorer::merge_value_and_refcnt_diff(value, update); + } + void merge_update_and_update(std::string &left_update, td::Slice right_update) override { + LOG(ERROR) << "update_and_update"; + UNREACHABLE(); + return CellStorer::merge_refcnt_diffs(left_update, right_update); + } +}; +struct CompactionFilterEraseEmptyValues : public rocksdb::CompactionFilter { + bool Filter(int level, const rocksdb::Slice & /*key*/, const rocksdb::Slice &existing_value, std::string *new_value, + bool *value_changed) const override { + return existing_value.empty(); + } + bool FilterMergeOperand(int, const rocksdb::Slice & /*key*/, const rocksdb::Slice &operand) const override { + return operand.empty(); + } + + // Name of the compaction filter + const char *Name() const override { + return "CompactionFilterEraseEmptyValues"; + } +}; +auto to_td(rocksdb::Slice value) -> td::Slice { + return td::Slice(value.data(), value.size()); +} + +struct MergeOperatorAddCellRefcnt : public rocksdb::MergeOperator { + const char *Name() const override { + return "MergeOperatorAddCellRefcnt"; + } + bool FullMergeV2(const MergeOperationInput &merge_in, MergeOperationOutput *merge_out) const override { + CHECK(merge_in.existing_value); + auto &value = *merge_in.existing_value; + CHECK(merge_in.operand_list.size() >= 1); + td::Slice diff; + std::string diff_buf; + if (merge_in.operand_list.size() == 1) { + diff = to_td(merge_in.operand_list[0]); + } else { + diff_buf = merge_in.operand_list[0].ToString(); + for (size_t i = 1; i < merge_in.operand_list.size(); ++i) { + CellStorer::merge_refcnt_diffs(diff_buf, to_td(merge_in.operand_list[i])); + } + diff = diff_buf; + } + + merge_out->new_value = value.ToString(); + CellStorer::merge_value_and_refcnt_diff(merge_out->new_value, diff); + return true; + } + bool PartialMerge(const rocksdb::Slice & /*key*/, const rocksdb::Slice &left, const rocksdb::Slice &right, + std::string *new_value, rocksdb::Logger *logger) const override { + *new_value = left.ToString(); + CellStorer::merge_refcnt_diffs(*new_value, to_td(right)); + return true; + } +}; + +struct DB { + std::unique_ptr dboc; + std::shared_ptr kv; + void reset_loader() { + dboc->set_loader(std::make_unique(kv->snapshot())); + } +}; struct BocOptions { - std::shared_ptr async_executor; - std::optional o_in_memory; + using AsyncExecutor = DynamicBagOfCellsDb::AsyncExecutor; + + using CreateInMemoryOptions = DynamicBagOfCellsDb::CreateInMemoryOptions; + using CreateV1Options = DynamicBagOfCellsDb::CreateV1Options; + using CreateV2Options = DynamicBagOfCellsDb::CreateV2Options; + + std::shared_ptr async_executor; + struct KvOptions { + enum KvType { InMemory, RocksDb } kv_type{InMemory}; + bool experimental{false}; + bool no_transactions{false}; + size_t cache_size{0}; + friend td::StringBuilder &operator<<(td::StringBuilder &sb, const KvOptions &kv_options) { + if (kv_options.kv_type == KvType::InMemory) { + return sb << "InMemory{}"; + } + return sb << "RockDb{cache_size=" << kv_options.cache_size << ", no_transactions=" << kv_options.no_transactions + << ", experimental=" << kv_options.experimental << "}"; + } + }; + KvOptions kv_options; + std::variant options; td::uint64 seed{123}; - auto create_dboc(td::KeyValueReader *kv, std::optional o_root_n) { - if (o_in_memory) { - auto res = DynamicBagOfCellsDb::create_in_memory(kv, *o_in_memory); - auto stats = res->get_stats().move_as_ok(); - if (o_root_n) { - ASSERT_EQ(*o_root_n, stats.roots_total_count); + std::shared_ptr create_kv(std::shared_ptr old_key_value, bool no_reads = false) { + if (kv_options.kv_type == KvOptions::InMemory) { + if (old_key_value) { + return old_key_value; + } + return std::make_shared(std::make_shared()); + } else if (kv_options.kv_type == KvOptions::RocksDb) { + auto merge_operator = std::make_shared(); + static const CompactionFilterEraseEmptyValues compaction_filter; + CHECK(!old_key_value || old_key_value.use_count() == 1); + std::string db_path = "test_celldb"; + if (old_key_value) { + //LOG(ERROR) << "Reload rocksdb"; + old_key_value.reset(); + } else { + //LOG(ERROR) << "New rocksdb"; + td::RocksDb::destroy(db_path).ensure(); + } + auto db_options = td::RocksDbOptions{ + .block_cache = {}, + .merge_operator = merge_operator, + .compaction_filter = &compaction_filter, + .experimental = kv_options.experimental, + .no_reads = no_reads, + .no_transactions = kv_options.no_transactions, + .use_direct_reads = true, + .no_block_cache = true, + }; + if (kv_options.cache_size != 0) { + db_options.no_block_cache = false; + db_options.block_cache = rocksdb::NewLRUCache(kv_options.cache_size); } - VLOG(boc) << "reset roots_n=" << stats.roots_total_count << " cells_n=" << stats.cells_total_count; - return res; + return std::make_shared(td::RocksDb::open(db_path, std::move(db_options)).move_as_ok()); + } else { + UNREACHABLE(); + } + } + void check_kv_is_empty(KeyValue &kv) { + if (kv_options.kv_type == KvOptions::InMemory) { + ASSERT_EQ(0u, kv.count("").move_as_ok()); + return; } - return DynamicBagOfCellsDb::create(); + + size_t non_empty_values = 0; + kv.for_each([&](auto key, auto value) { + non_empty_values += !value.empty(); + return td::Status::OK(); + }); + if (non_empty_values != 0) { + kv.for_each([&](auto key, auto value) { + LOG(ERROR) << "Key: " << td::hex_encode(key) << " Value: " << td::hex_encode(value); + std::string x; + LOG(ERROR) << int(kv.get(key, x).move_as_ok()); + return td::Status::OK(); + }); + } + ASSERT_EQ(0u, non_empty_values); + } + + [[nodiscard]] auto create_db(DB db, std::optional o_root_n) { + auto old_boc = std::move(db.dboc); + auto old_kv = std::move(db.kv); + old_boc.reset(); + using ResT = DB; + return std::visit(td::overloaded( + [&](CreateV1Options &) -> ResT { + auto new_kv = create_kv(std::move(old_kv)); + auto res = DynamicBagOfCellsDb::create(); + res->set_loader(std::make_unique(new_kv->snapshot())); + return DB{.dboc = std::move(res), .kv = std::move(new_kv)}; + }, + [&](CreateV2Options &options) -> ResT { + auto new_kv = create_kv(std::move(old_kv)); + auto res = DynamicBagOfCellsDb::create_v2(options); + res->set_loader(std::make_unique(new_kv->snapshot())); + return DB{.dboc = std::move(res), .kv = std::move(new_kv)}; + }, + [&](CreateInMemoryOptions &options) -> ResT { + auto read_kv = create_kv(std::move(old_kv), false); + auto res = DynamicBagOfCellsDb::create_in_memory(read_kv.get(), options); + auto new_kv = create_kv(std::move(read_kv), true); + res->set_loader(std::make_unique(new_kv->snapshot())); + auto stats = res->get_stats().move_as_ok(); + if (o_root_n) { + ASSERT_EQ(*o_root_n, stats.roots_total_count); + } + VLOG(boc) << "reset roots_n=" << stats.roots_total_count + << " cells_n=" << stats.cells_total_count; + return DB{.dboc = std::move(res), .kv = std::move(new_kv)}; + }), + options); }; void prepare_commit(DynamicBagOfCellsDb &dboc) { + td::PerfWarningTimer warning_timer("test_db_prepare_commit"); if (async_executor) { - async_executor->inc_generation(); std::latch latch(1); td::Result res; async_executor->execute_sync([&] { @@ -993,70 +1192,175 @@ struct BocOptions { }); latch.wait(); async_executor->execute_sync([&] {}); - async_executor->inc_generation(); + res.ensure(); } else { dboc.prepare_commit(); } } + enum CacheAction { ResetCache, KeepCache }; + void write_commit(DynamicBagOfCellsDb &dboc, std::shared_ptr kv, CacheAction action) { + td::PerfWarningTimer warning_timer("test_db_write_commit"); + kv->begin_write_batch().ensure(); + CellStorer cell_storer(*kv); + { + td::PerfWarningTimer timer("test_db_commit"); + dboc.commit(cell_storer).ensure(); + } + { + td::PerfWarningTimer timer("test_db_commit_write_batch"); + kv->commit_write_batch().ensure(); + } + switch (action) { + case ResetCache: { + td::PerfWarningTimer timer("test_db_reset_cache"); + dboc.set_loader(std::make_unique(kv->snapshot())); + break; + } + case KeepCache: + break; + } + } + + void commit(DB &db, CacheAction action = ResetCache) { + prepare_commit(*db.dboc); + write_commit(*db.dboc, db.kv, action); + } + + std::string description() const { + td::StringBuilder sb; + + sb << "DBOC(type="; + std::visit(td::overloaded([&](const CreateV1Options &) { sb << "V1"; }, + [&](const CreateV2Options &options) { + sb << "V2(concurrency=" << options.extra_threads + 1; + if (options.executor) { + sb << ", executor=" << options.executor->describe(); + } else { + sb << ", executor=threads"; + } + sb << ")"; + }, + [&](const CreateInMemoryOptions &options) { + sb << "InMemory(use_arena=" << options.use_arena + << ", less_memory=" << options.use_less_memory_during_creation << ")"; + }), + options); + sb << kv_options; + if (async_executor) { + sb << ", executor=" << async_executor->describe(); + } + sb << ")"; + + return sb.as_cslice().str(); + } }; template -void with_all_boc_options(F &&f, size_t tests_n = 500) { +void with_all_boc_options(F &&f, size_t tests_n, bool single_thread = false) { LOG(INFO) << "Test dynamic boc"; auto counter = [] { return td::NamedThreadSafeCounter::get_default().get_counter("DataCell").sum(); }; + std::map>> benches; auto run = [&](BocOptions options) { - LOG(INFO) << "\t" << (options.o_in_memory ? "in memory" : "on disk") << (options.async_executor ? " async" : ""); - if (options.o_in_memory) { - LOG(INFO) << "\t\tuse_arena=" << options.o_in_memory->use_arena - << " less_memory=" << options.o_in_memory->use_less_memory_during_creation; - } + auto description = options.description(); + LOG(INFO) << "Running " << description; + auto start = td::Timestamp::now(); + DynamicBagOfCellsDb::Stats stats; + auto o_in_memory = std::get_if(&options.options); for (td::uint32 i = 0; i < tests_n; i++) { auto before = counter(); + options.seed = i == 0 ? 123 : i; - f(options); + auto stats_diff = f(options); + stats.apply_diff(stats_diff); + auto after = counter(); - LOG_CHECK((options.o_in_memory && options.o_in_memory->use_arena) || before == after) - << before << " vs " << after; + LOG_CHECK((o_in_memory && o_in_memory->use_arena) || before == after) << before << " vs " << after; } + LOG(INFO) << "\ttook " << td::Timestamp::now().at() - start.at() << " seconds"; + LOG(INFO) << stats; + for (auto &[key, value] : stats.named_stats.stats_int) { + if (td::begins_with(key, "bench_")) { + benches[key].emplace_back(value, description); + } + } + }; + + // NB: use .experimental to play with different RocksDb parameters + // Note, that new benchmark are necessary to fully understand the effect of different RocksDb options + std::vector kv_options_list = { + // BocOptions::KvOptions{.kv_type = BocOptions::KvOptions::InMemory}, + // BocOptions::KvOptions{.kv_type = BocOptions::KvOptions::RocksDb, .experimental = false, .cache_size = 0}, + BocOptions::KvOptions{ + .kv_type = BocOptions::KvOptions::RocksDb, .experimental = false, .cache_size = size_t{128 << 20}}, }; - run({.async_executor = std::make_shared(4)}); - run({}); - for (auto use_arena : {false, true}) { - for (auto less_memory : {false, true}) { - run({.o_in_memory = - DynamicBagOfCellsDb::CreateInMemoryOptions{.extra_threads = std::thread::hardware_concurrency(), - .verbose = false, - .use_arena = use_arena, - .use_less_memory_during_creation = less_memory}}); + std::vector has_executor_options = {false, true}; + for (auto kv_options : kv_options_list) { + for (bool has_executor : has_executor_options) { + std::shared_ptr executor; + if (has_executor) { + executor = std::make_shared( + 4); // 4 - to compare V1 and V2, because V1 has parallel_load = 4 by default + } + // V2 - 4 threads + run({.async_executor = executor, + .kv_options = kv_options, + .options = DynamicBagOfCellsDb::CreateV2Options{ + .extra_threads = 3, .executor = executor, .cache_ttl_max = 5}}); + + // V1 + run({.async_executor = executor, .kv_options = kv_options, .options = DynamicBagOfCellsDb::CreateV1Options{}}); + + // V2 - one thread + run({.async_executor = executor, + .kv_options = kv_options, + .options = + DynamicBagOfCellsDb::CreateV2Options{.extra_threads = 0, .executor = executor, .cache_ttl_max = 5}}); + + // InMemory + for (auto use_arena : {false, true}) { + for (auto less_memory : {false, true}) { + run({.async_executor = executor, + .kv_options = kv_options, + .options = + DynamicBagOfCellsDb::CreateInMemoryOptions{.extra_threads = std::thread::hardware_concurrency(), + .verbose = false, + .use_arena = use_arena, + .use_less_memory_during_creation = less_memory}}); + } + } + } + } + + for (auto &[name, v] : benches) { + std::sort(v.begin(), v.end()); + LOG(INFO) << "Bench " << name; + for (auto &[t, name] : v) { + LOG(INFO) << "\t" << name << " " << double(t) / 1000 << "s"; } } } -void test_dynamic_boc(BocOptions options) { - auto counter = [] { return td::NamedThreadSafeCounter::get_default().get_counter("DataCell").sum(); }; - auto before = counter(); - SCOPE_EXIT { - LOG_CHECK((options.o_in_memory && options.o_in_memory->use_arena) || before == counter()) - << before << " vs " << counter(); - }; +DynamicBagOfCellsDb::Stats test_dynamic_boc(BocOptions options) { + DynamicBagOfCellsDb::Stats stats; td::Random::Xorshift128plus rnd{options.seed}; std::string old_root_hash; std::string old_root_serialization; - auto kv = std::make_shared(); - auto create_dboc = [&]() { + DB db; + auto reload_db = [&]() { auto roots_n = old_root_hash.empty() ? 0 : 1; - return options.create_dboc(kv.get(), roots_n); + db = options.create_db(std::move(db), roots_n); }; - auto dboc = create_dboc(); - dboc->set_loader(std::make_unique(kv)); + reload_db(); for (int t = 1000; t >= 0; t--) { if (rnd() % 10 == 0) { - dboc = create_dboc(); + reload_db(); } - dboc->set_loader(std::make_unique(kv)); + db.dboc->load_cell(vm::CellHash{}.as_slice()).ensure_error(); + + db.reset_loader(); Ref old_root; if (!old_root_hash.empty()) { - old_root = dboc->load_cell(old_root_hash).move_as_ok(); + old_root = db.dboc->load_cell(old_root_hash).move_as_ok(); auto serialization = serialize_boc(old_root); ASSERT_EQ(old_root_serialization, serialization); } @@ -1071,47 +1375,61 @@ void test_dynamic_boc(BocOptions options) { ->get_root_cell(0) .move_as_ok(); - dboc->dec(old_root); + db.dboc->dec(old_root); if (t != 0) { - dboc->inc(cell); - } - dboc->prepare_commit().ensure(); - { - CellStorer cell_storer(*kv); - dboc->commit(cell_storer).ensure(); + db.dboc->inc(cell); } + options.commit(db, BocOptions::ResetCache); } - ASSERT_EQ(0u, kv->count("").ok()); + options.check_kv_is_empty(*db.kv); + + stats.named_stats.apply_diff(db.kv->get_usage_stats().to_named_stats()); + return stats; } TEST(TonDb, DynamicBoc) { with_all_boc_options(test_dynamic_boc, 1); }; -void test_dynamic_boc2(BocOptions options) { +DynamicBagOfCellsDb::Stats test_dynamic_boc2(BocOptions options) { td::Random::Xorshift128plus rnd{options.seed}; + DynamicBagOfCellsDb::Stats stats; - int total_roots = rnd.fast(1, !rnd.fast(0, 10) * 100 + 10); + int total_roots = rnd.fast(1, !rnd.fast(0, 30) * 100 + 10); int max_roots = rnd.fast(1, 20); + int max_cells = 20; + + // VERBOSITY_NAME(boc) = 1; + // LOG(WARNING) << "====================================================\n\n"; + // max_roots = 2; + // total_roots = 2; + // max_cells = 2; + + auto meta_key = [](size_t i) { return PSTRING() << "meta." << i; }; + std::array meta; + int last_commit_at = 0; int first_root_id = 0; int last_root_id = 0; - auto kv = std::make_shared(); - auto create_dboc = [&](td::int64 root_n) { return options.create_dboc(kv.get(), root_n); }; - auto dboc = create_dboc(0); - dboc->set_loader(std::make_unique(kv)); + DB db; + auto reload_db = [&](td::int64 root_n) { db = options.create_db(std::move(db), root_n); }; + reload_db(0); auto counter = [] { return td::NamedThreadSafeCounter::get_default().get_counter("DataCell").sum(); }; auto before = counter(); - SCOPE_EXIT{ - // LOG_CHECK((options.o_in_memory && options.o_in_memory->use_arena) || before == counter()) - // << before << " vs " << counter(); + SCOPE_EXIT { + bool skip_check = false; + if (std::holds_alternative(options.options) && + std::get(options.options).use_arena) { + skip_check = true; + } + LOG_IF(FATAL, !(skip_check || before == counter())) << before << " vs " << counter(); }; std::vector> roots(max_roots); std::vector root_hashes(max_roots); auto add_root = [&](Ref root) { - dboc->inc(root); + db.dboc->inc(root); root_hashes[last_root_id % max_roots] = (root->get_hash().as_slice().str()); roots[last_root_id % max_roots] = root; last_root_id++; @@ -1124,9 +1442,9 @@ void test_dynamic_boc2(BocOptions options) { VLOG(boc) << " from db"; auto from_root_hash = root_hashes[root_id % max_roots]; if (rnd() % 2 == 0) { - from_root = dboc->load_root(from_root_hash).move_as_ok(); + from_root = db.dboc->load_root(from_root_hash).move_as_ok(); } else { - from_root = dboc->load_cell(from_root_hash).move_as_ok(); + from_root = db.dboc->load_cell(from_root_hash).move_as_ok(); } } else { VLOG(boc) << "FROM MEMORY"; @@ -1147,31 +1465,69 @@ void test_dynamic_boc2(BocOptions options) { from_root = get_root(rnd.fast(first_root_id, last_root_id - 1)); } VLOG(boc) << " ..."; - auto new_root = gen_random_cell(rnd.fast(1, 20), from_root, rnd); - root_cnt[new_root->get_hash()]++; - add_root(std::move(new_root)); + auto new_root_cell = gen_random_cell(rnd.fast(1, max_cells), from_root, rnd); + root_cnt[new_root_cell->get_hash()]++; + add_root(std::move(new_root_cell)); VLOG(boc) << " OK"; }; - auto commit = [&] { - VLOG(boc) << "commit"; - //rnd.fast(0, 1); - options.prepare_commit(*dboc); - { - CellStorer cell_storer(*kv); - dboc->commit(cell_storer); + td::UsageStats commit_stats{}; + auto commit = [&](bool finish = false) { + for (size_t i = 0; i < meta.size(); i++) { + std::string value; + auto status = db.dboc->meta_get(meta_key(i), value).move_as_ok(); + if (status == KeyValue::GetStatus::Ok) { + ASSERT_EQ(value, meta[i]); + ASSERT_TRUE(!meta[i].empty()); + } else { + ASSERT_TRUE(meta[i].empty()); + } + + if (meta[i].empty()) { + if (!finish && rnd() % 2 == 0) { + meta[i] = td::to_string(rnd()); + db.dboc->meta_set(meta_key(i), meta[i]); + VLOG(boc) << "meta set " << meta_key(i) << " " << meta[i]; + } + } else { + auto f = finish ? 1 : rnd() % 3; + if (f == 0) { + meta[i] = td::to_string(rnd()); + db.dboc->meta_set(meta_key(i), meta[i]); + VLOG(boc) << "meta set " << meta_key(i) << " " << meta[i]; + } else if (f == 1) { + meta[i] = ""; + db.dboc->meta_erase(meta_key(i)); + VLOG(boc) << "meta erase " << meta_key(i); + } + } } - dboc->set_loader(std::make_unique(kv)); + + VLOG(boc) << "before commit cells_in_db=" << db.kv->count(""); + //rnd.fast(0, 1); + auto stats_before = db.kv->get_usage_stats(); + options.commit(db, BocOptions::ResetCache); + auto stats_after = db.kv->get_usage_stats(); + commit_stats = commit_stats + stats_after - stats_before; + VLOG(boc) << "after commit cells_in_db=" << db.kv->count(""); + + // db.reset_loader(); for (int i = last_commit_at; i < last_root_id; i++) { roots[i % max_roots].clear(); } last_commit_at = last_root_id; }; - auto reset = [&] { + auto reset = [&](bool force_full = false) { VLOG(boc) << "reset"; commit(); - dboc = create_dboc(td::int64(root_cnt.size())); - dboc->set_loader(std::make_unique(kv)); + if (rnd() % 3 == 0 || force_full) { + // very slow for rocksdb + auto r_stats = db.dboc->get_stats(); + if (r_stats.is_ok()) { + stats.apply_diff(r_stats.ok()); + } + reload_db(root_cnt.size()); + } }; auto delete_root = [&] { @@ -1187,22 +1543,28 @@ void test_dynamic_boc2(BocOptions options) { root_cnt.erase(it); } - dboc->dec(std::move(old_root)); + db.dboc->dec(std::move(old_root)); first_root_id++; VLOG(boc) << " OK"; }; td::RandomSteps steps({{new_root, 10}, {delete_root, 9}, {commit, 2}, {reset, 1}}); while (first_root_id != total_roots) { - VLOG(boc) << first_root_id << " " << last_root_id << " " << kv->count("").ok(); + VLOG(boc) << first_root_id << " " << last_root_id; // << " " << db.kv->count("").ok(); steps.step(rnd); } - commit(); - ASSERT_EQ(0u, kv->count("").ok()); + commit(true); + options.check_kv_is_empty(*db.kv); + + // auto stats = kv->get_usage_stats(); + // LOG(ERROR) << "total: " << stats; + reset(true); + stats.named_stats.apply_diff(db.kv->get_usage_stats().to_named_stats()); + return stats; } TEST(TonDb, DynamicBoc2) { - with_all_boc_options(test_dynamic_boc2); + with_all_boc_options(test_dynamic_boc2, 50); } template @@ -1341,6 +1703,10 @@ class CompactArray { size_t size() const { return size_; } + void reset() { + size_ = 0; + root_ = {}; + } Ref merkle_proof(std::vector keys) { std::set hashes; @@ -1435,6 +1801,283 @@ class FastCompactArray { std::vector v_; }; +struct BocTestHelper { + public: + BocTestHelper() = default; + BocTestHelper(td::int64 seed) : rnd_(seed) { + } + + CompactArray create_array(size_t size, td::uint64 max_value) { + std::vector v(size); + td::Random::Xorshift128plus rnd{123}; + for (auto &x : v) { + x = rnd() % max_value; + } + return CompactArray(v); + } + + private: + td::Random::Xorshift128plus rnd_{123}; +}; + +DynamicBagOfCellsDb::Stats bench_dboc_get_and_set(BocOptions options) { + BocTestHelper helper(options.seed); + size_t n = 1 << 20; + size_t max_value = 1 << 26; + auto arr = helper.create_array(n, max_value); + + // auto kv = std::make_shared(); + td::Slice db_path = "compact_array_db"; + td::RocksDb::destroy(db_path).ensure(); + + DB db = options.create_db({}, {}); + DynamicBagOfCellsDb::Stats stats; + + td::Timer total_timer; + + auto bench = [&](td::Slice desc, auto &&f) { + auto before = db.dboc->get_stats().move_as_ok(); + td::Timer timer; + LOG(ERROR) << "Benchmarking " << desc; + f(); + stats.named_stats.stats_int[desc.str()] = td::int64(timer.elapsed() * 1000); + LOG(ERROR) << "Benchmarking " << desc << " done: " << timer.elapsed() << "s\n"; + auto after = db.dboc->get_stats().move_as_ok(); + after.named_stats.subtract_diff(before.named_stats); + LOG(ERROR) << after; + }; + + td::VectorQueue roots; + // Save array in db + bench(PSLICE() << "bench_inc_large_db(n=" << n << ")", [&] { + db.dboc->inc(arr.root()); + roots.push(arr.root()->get_hash()); + options.commit(db, BocOptions::ResetCache); + }); + bench("bench_compactify", [&] { + auto status = dynamic_cast(*db.kv).raw_db()->CompactRange({}, nullptr, nullptr); + LOG_IF(FATAL, !status.ok()) << status.ToString(); + }); + db = options.create_db(std::move(db), {}); + + bench(PSLICE() << "bench_inc_large_existed_db(n=" << n << ")", [&] { + db.dboc->inc(arr.root()); + roots.push(arr.root()->get_hash()); + options.commit(db, BocOptions::ResetCache); + }); + + td::Random::Xorshift128plus rnd{123}; + while (false) { + auto hash = arr.root()->get_hash(); + arr = CompactArray{n, db.dboc->load_root(hash.as_slice()).move_as_ok()}; + td::Timer timer; + for (size_t i = 0; i < 10000; i++) { + auto pos = rnd() % n; + arr.get(pos); + } + LOG(ERROR) << timer.elapsed() << "s\n"; + db.reset_loader(); + } + + for (auto p : + std::vector>{{10000, 0}, {10000, 5}, {5000, 5000}, {5, 10000}, {0, 10000}}) { + auto get_n = p.first; + auto set_n = p.second; + auto hash = arr.root()->get_hash(); + arr = CompactArray{n, db.dboc->load_root(hash.as_slice()).move_as_ok()}; + bench(PSTRING() << "bench_changes(get_n=" << get_n << ", set_n=" << set_n << ")", [&] { + for (size_t i = 0; i < get_n; i++) { + auto pos = rnd() % n; + arr.get(pos); + } + for (size_t i = 0; i < set_n; i++) { + auto pos = rnd() % n; + auto value = rnd() % max_value; + arr.set(pos, value); + } + }); + bench(PSTRING() << "bench_commit(get_n=" << get_n << ", set_n=" << set_n << ")", [&] { + db.dboc->inc(arr.root()); + roots.push(arr.root()->get_hash()); + options.commit(db, BocOptions::ResetCache); + }); + } + arr.reset(); + + bench(PSLICE() << "bench_dec_some_roots()", [&] { + while (roots.size() > 1) { + auto hash = roots.pop(); + auto cell = db.dboc->load_cell(hash.as_slice()).move_as_ok(); + db.dboc->dec(cell); + } + options.commit(db, BocOptions::ResetCache); + }); + + db = options.create_db(std::move(db), {}); + + bench(PSLICE() << "bench_dec_large_root(n=" << n << ")", [&] { + while (!roots.empty()) { + auto hash = roots.pop(); + auto cell = db.dboc->load_cell(hash.as_slice()).move_as_ok(); + db.dboc->dec(cell); + + /* + do { + auto cell = db.dboc->load_cell(hash.as_slice()).move_as_ok(); + db.dboc->dec(cell); + cell = {}; + options.prepare_commit(*db.dboc); + //db.dboc->prepare_commit().ensure(); + db.reset_loader(); + db = options.create_db(std::move(db), {}); + } while (true); + */ + } + options.commit(db, BocOptions::ResetCache); + }); + stats.named_stats.stats_int["bench_total"] = td::int64(total_timer.elapsed() * 1000); + + return stats; +} + +TEST(TonDb, BenchDynamicBocGetAndSet) { + with_all_boc_options(bench_dboc_get_and_set, 1); +} + +TEST(TonDb, DynamicBocIncSimple) { + auto kv = std::make_shared(std::make_shared()); + auto db = DynamicBagOfCellsDb::create_v2({.extra_threads = 0}); + db->set_loader(std::make_unique(kv)); + + td::Random::Xorshift128plus rnd(123); + size_t size = 4; + std::vector values(size); + for (auto &v : values) { + //v = rnd() % 2; + v = rnd(); + } + // 1. Create large dictionary and store it in db + auto arr_ptr = std::make_unique(values); + auto &arr = *arr_ptr; + td::VectorQueue queue; + auto push = [&]() { + //LOG(ERROR) << "PUSH ROOT"; + auto begin_stats = kv->get_usage_stats(); + db->inc(arr.root()); + queue.push(arr.root()->get_hash()); + vm::CellStorer cell_storer(*kv); + db->commit(cell_storer); + auto end_stats = kv->get_usage_stats(); + LOG(ERROR) << end_stats - begin_stats; + db->set_loader(std::make_unique(kv)); + auto hash = arr.root()->get_hash(); + arr = CompactArray{size, db->load_root(hash.as_slice()).move_as_ok()}; + //LOG(ERROR) << "CELLS IN DB: " << kv->count("").move_as_ok(); + }; + auto pop = [&]() { + if (queue.empty()) { + return; + } + //LOG(ERROR) << "POP ROOT"; + auto begin_stats = kv->get_usage_stats(); + auto cell = db->load_cell(queue.pop().as_slice()).move_as_ok(); + db->dec(cell); + vm::CellStorer cell_storer(*kv); + db->commit(cell_storer); + auto end_stats = kv->get_usage_stats(); + db->set_loader(std::make_unique(kv)); + //LOG(ERROR) << end_stats - begin_stats; + //LOG(ERROR) << "CELLS IN DB: " << kv->count("").move_as_ok(); + }; + auto upd = [&] { + for (int i = 0; i < 20; i++) { + auto pos = rnd.fast(0, td::narrow_cast(size) - 1); + if (rnd() % 2) { + auto value = rnd() % 2; + arr.set(pos, value); + } else { + arr.get(pos); + } + } + }; + + //LOG(ERROR) << "Created compact array"; + push(); + pop(); + //CHECK(kv->count("").move_as_ok() == 0); + + // 2. Lets change first 20 keys and read last 20 keys + /* + for (size_t i = 0; i < 20 && i < size; i++) { + arr.set(i, rnd()); + } + */ + //arr.set(0, rnd()); + arr.set(size - 1, rnd()); + for (size_t i = 0; i < 20 && i < size; i++) { + arr.get(size - i - 1); + } + + // 3. And now commit diff with stats + push(); + push(); + upd(); + upd(); + push(); + push(); + upd(); + pop(); + pop(); + upd(); + push(); + push(); + while (!queue.empty()) { + pop(); + } + LOG(ERROR) << "CELLS IN DB: " << kv->count("").move_as_ok(); +} + +class BenchCellStorerMergeRefcntDiffs : public td::Benchmark { + public: + std::string get_description() const override { + return PSTRING() << "bench_cells_storer_merge_refcnt_diffs"; + } + + void run(int n) override { + auto cell = vm::CellBuilder().store_bytes(std::string(32, 'A')).finalize(); + auto left_update = CellStorer::serialize_refcnt_diffs(1); + auto right_update = CellStorer::serialize_refcnt_diffs(1); + for (int i = 0; i < n; i++) { + CellStorer::merge_refcnt_diffs(left_update, right_update); + } + } + + private: + size_t tn_{}; +}; +class BenchCellStorerMergeValueAndRefcntDiff : public td::Benchmark { + public: + std::string get_description() const override { + return PSTRING() << "bench_cells_storer_merge_value_and_refcnt_diffs"; + } + + void run(int n) override { + auto cell = vm::CellBuilder().store_bytes(std::string(32, 'A')).finalize(); + auto value = CellStorer::serialize_value(10, cell, false); + auto update = CellStorer::serialize_refcnt_diffs(1); + for (int i = 0; i < n; i++) { + CellStorer::merge_value_and_refcnt_diff(value, update); + } + } + + private: + size_t tn_{}; +}; +TEST(Bench, CellStorerMerge) { + bench(BenchCellStorerMergeRefcntDiffs()); + bench(BenchCellStorerMergeValueAndRefcntDiff()); +} + TEST(Cell, BocHands) { serialize_boc(CellBuilder{}.store_bytes("AAAAAAAA").finalize()); auto a = CellBuilder{}.store_bytes("abcd").store_ref(CellBuilder{}.store_bytes("???").finalize()).finalize(); @@ -2262,7 +2905,37 @@ TEST(TonDb, BocRespectsUsageCell) { ASSERT_STREQ(serialization, serialization_of_virtualized_cell); } -void test_dynamic_boc_respectes_usage_cell(vm::BocOptions options) { +TEST(UsageTree, ThreadSafe) { + size_t test_n = 100; + td::Random::Xorshift128plus rnd(123); + for (size_t test_i = 0; test_i < test_n; test_i++) { + auto cell = vm::gen_random_cell(rnd.fast(2, 100), rnd, false); + auto usage_tree = std::make_shared(); + auto usage_cell = vm::UsageCell::create(cell, usage_tree->root_ptr()); + std::ptrdiff_t threads_n = 1; // TODO: when CellUsageTree is thread safe, change it to 4 + auto barrier = std::barrier{threads_n}; + std::vector threads; + std::vector explorations(threads_n); + for (std::ptrdiff_t i = 0; i < threads_n; i++) { + threads.emplace_back([&, i = i]() { + barrier.arrive_and_wait(); + explorations[i] = vm::CellExplorer::random_explore(usage_cell, rnd); + }); + } + for (auto &thread : threads) { + thread.join(); + } + auto proof = vm::MerkleProof::generate(cell, usage_tree.get()); + auto virtualized_proof = vm::MerkleProof::virtualize(proof, 1); + for (auto &exploration : explorations) { + auto new_exploration = vm::CellExplorer::explore(virtualized_proof, exploration.ops); + ASSERT_EQ(exploration.log, new_exploration.log); + } + } +} + +/* +vm::DynamicBagOfCellsDb::Stats test_dynamic_boc_respects_usage_cell(vm::BocOptions options) { td::Random::Xorshift128plus rnd(options.seed); auto cell = vm::gen_random_cell(20, rnd, true); auto usage_tree = std::make_shared(); @@ -2283,11 +2956,14 @@ void test_dynamic_boc_respectes_usage_cell(vm::BocOptions options) { auto serialization_of_virtualized_cell = serialize_boc(virtualized_proof); auto serialization = serialize_boc(cell); ASSERT_STREQ(serialization, serialization_of_virtualized_cell); + vm::DynamicBagOfCellsDb::Stats stats; + return stats; } TEST(TonDb, DynamicBocRespectsUsageCell) { - vm::with_all_boc_options(test_dynamic_boc_respectes_usage_cell, 20); + vm::with_all_boc_options(test_dynamic_boc_respects_usage_cell, 20, true); } +*/ TEST(TonDb, LargeBocSerializer) { td::Random::Xorshift128plus rnd{123}; diff --git a/crypto/vm/cells/Cell.h b/crypto/vm/cells/Cell.h index a75371dbb..e2b47ffc0 100644 --- a/crypto/vm/cells/Cell.h +++ b/crypto/vm/cells/Cell.h @@ -55,6 +55,7 @@ class Cell : public CellTraits { } // load interface + virtual td::Status set_data_cell(Ref &&data_cell) const = 0; virtual td::Result load_cell() const = 0; virtual Ref virtualize(VirtualizationParameters virt) const; virtual td::uint32 get_virtualization() const = 0; diff --git a/crypto/vm/cells/DataCell.cpp b/crypto/vm/cells/DataCell.cpp index 4dd301616..73e86517a 100644 --- a/crypto/vm/cells/DataCell.cpp +++ b/crypto/vm/cells/DataCell.cpp @@ -36,7 +36,8 @@ struct ArenaAllocator { T* obj = new (ptr) T(std::forward(args)...); return std::unique_ptr(obj); } -private: + + private: td::MutableSlice alloc_batch() { size_t batch_size = 1 << 20; auto batch = std::make_unique(batch_size); @@ -53,7 +54,7 @@ struct ArenaAllocator { return res; } }; -} +} // namespace std::unique_ptr DataCell::create_empty_data_cell(Info info) { if (use_arena) { ArenaAllocator allocator; diff --git a/crypto/vm/cells/DataCell.h b/crypto/vm/cells/DataCell.h index 6d3c845fc..b39ee1d4b 100644 --- a/crypto/vm/cells/DataCell.h +++ b/crypto/vm/cells/DataCell.h @@ -31,6 +31,9 @@ class DataCell : public Cell { static thread_local bool use_arena; DataCell(const DataCell& other) = delete; + DataCell(DataCell&& other) = delete; + DataCell& operator=(const DataCell& other) = delete; + DataCell& operator=(DataCell&& other) = delete; ~DataCell() override; static void store_depth(td::uint8* dest, td::uint16 depth) { @@ -126,6 +129,10 @@ class DataCell : public Cell { explicit DataCell(Info info); public: + td::Status set_data_cell(Ref&& data_cell) const override { + CHECK(get_hash() == data_cell->get_hash()); + return td::Status::OK(); + } td::Result load_cell() const override { return LoadedCell{Ref{this}, {}, {}}; } @@ -228,4 +235,3 @@ inline CellHash as_cell_hash(const Ref& cell) { } } // namespace vm - diff --git a/crypto/vm/cells/ExtCell.h b/crypto/vm/cells/ExtCell.h index 401bb0483..dbbd8575b 100644 --- a/crypto/vm/cells/ExtCell.h +++ b/crypto/vm/cells/ExtCell.h @@ -65,6 +65,9 @@ class ExtCell : public Cell { bool is_loaded() const override { return CellView(this)->is_loaded(); } + Ref> get_prunned_cell() const { + return prunned_cell_.load(); + } private: mutable td::AtomicRef data_cell_; @@ -112,6 +115,23 @@ class ExtCell : public Cell { return CellView(this)->get_depth(level); } + td::Status set_data_cell(Ref&& new_data_cell) const override { + auto prunned_cell = prunned_cell_.load(); + if (prunned_cell.is_null()) { + auto old_data_cell = data_cell_.get_unsafe(); + DCHECK(old_data_cell); + TRY_STATUS(old_data_cell->check_equals_unloaded(new_data_cell)); + return td::Status::OK(); + } + + TRY_STATUS(prunned_cell->check_equals_unloaded(new_data_cell)); + if (data_cell_.store_if_empty(new_data_cell)) { + prunned_cell_.store({}); + get_thread_safe_counter_unloaded().add(-1); + } + return td::Status::OK(); + } + td::Result> load_data_cell() const { auto data_cell = data_cell_.get_unsafe(); if (data_cell) { diff --git a/crypto/vm/cells/PrunnedCell.h b/crypto/vm/cells/PrunnedCell.h index a58b245cc..6e8b77093 100644 --- a/crypto/vm/cells/PrunnedCell.h +++ b/crypto/vm/cells/PrunnedCell.h @@ -142,6 +142,10 @@ class PrunnedCell : public Cell { return info_.get_depth(get_storage())[get_level_mask().apply(level).get_hash_i()]; } + td::Status set_data_cell(Ref &&data_cell) const override { + return td::Status::OK(); + } + td::Result load_cell() const override { return td::Status::Error("Can't load prunned branch"); } diff --git a/crypto/vm/cells/UsageCell.h b/crypto/vm/cells/UsageCell.h index 3e6e88982..978b91f76 100644 --- a/crypto/vm/cells/UsageCell.h +++ b/crypto/vm/cells/UsageCell.h @@ -36,6 +36,9 @@ class UsageCell : public Cell { return Ref{true, std::move(cell), std::move(tree_node), PrivateTag{}}; } + td::Status set_data_cell(Ref &&data_cell) const override { + return cell_->set_data_cell(std::move(data_cell)); + } // load interface td::Result load_cell() const override { TRY_RESULT(loaded_cell, cell_->load_cell()); diff --git a/crypto/vm/cells/VirtualCell.h b/crypto/vm/cells/VirtualCell.h index 02abc1c88..a75bdf9de 100644 --- a/crypto/vm/cells/VirtualCell.h +++ b/crypto/vm/cells/VirtualCell.h @@ -37,6 +37,9 @@ class VirtualCell : public Cell { } // load interface + td::Status set_data_cell(Ref &&data_cell) const override { + return cell_->set_data_cell(std::move(data_cell)); + } td::Result load_cell() const override { TRY_RESULT(loaded_cell, cell_->load_cell()); loaded_cell.virt = loaded_cell.virt.apply(virt_); diff --git a/crypto/vm/db/CellHashTable.h b/crypto/vm/db/CellHashTable.h index 522c987be..a38980638 100644 --- a/crypto/vm/db/CellHashTable.h +++ b/crypto/vm/db/CellHashTable.h @@ -40,6 +40,17 @@ class CellHashTable { return res; } + template + std::pair emplace(td::Slice hash, ArgsT &&...args) { + auto it = set_.find(hash); + if (it != set_.end()) { + return std::pair(const_cast(*it), false); + } + auto res = set_.emplace(std::forward(args)...); + CHECK(res.second); + return std::pair(const_cast(*res.first), res.second); + } + template void for_each(F &&f) { for (auto &info : set_) { @@ -64,7 +75,7 @@ class CellHashTable { size_t size() const { return set_.size(); } - InfoT* get_if_exists(td::Slice hash) { + InfoT *get_if_exists(td::Slice hash) { auto it = set_.find(hash); if (it != set_.end()) { return &const_cast(*it); diff --git a/crypto/vm/db/CellStorage.cpp b/crypto/vm/db/CellStorage.cpp index 06df461ef..a07d85e87 100644 --- a/crypto/vm/db/CellStorage.cpp +++ b/crypto/vm/db/CellStorage.cpp @@ -17,14 +17,19 @@ Copyright 2017-2020 Telegram Systems LLP */ #include "vm/db/CellStorage.h" + +#include "td/utils/Parser.h" #include "vm/db/DynamicBagOfCellsDb.h" #include "vm/boc.h" #include "td/utils/base64.h" #include "td/utils/tl_parsers.h" #include "td/utils/tl_helpers.h" +#include + namespace vm { namespace { + class RefcntCellStorer { public: RefcntCellStorer(td::int32 refcnt, const td::Ref &cell, bool as_boc) @@ -43,7 +48,9 @@ class RefcntCellStorer { storer.store_slice(data); return; } + CHECK(refcnt_ > 0); store(refcnt_, storer); + CHECK(cell_.not_null()) store(*cell_, storer); for (unsigned i = 0; i < cell_->size_refs(); i++) { auto cell = cell_->get_ref(i); @@ -91,6 +98,7 @@ class RefcntCellParser { stored_boc_ = true; parse(refcnt, parser); } + CHECK(refcnt > 0); if (!need_data_) { return; } @@ -159,6 +167,9 @@ td::Result CellLoader::load(td::Slice hash, bool need_da DCHECK(get_status == KeyValue::GetStatus::NotFound); return LoadResult{}; } + if (serialized.empty()) { + return LoadResult{}; + } TRY_RESULT(res, load(hash, serialized, need_data, ext_cell_creator)); if (on_load_callback_) { on_load_callback_(res); @@ -198,6 +209,7 @@ td::Result CellLoader::load_refcnt(td::Slice hash) { if (res.refcnt_ == -1) { parse(res.refcnt_, parser); } + CHECK(res.refcnt_ > 0); TRY_STATUS(parser.get_status()); return res; } @@ -216,4 +228,77 @@ std::string CellStorer::serialize_value(td::int32 refcnt, const td::Ref &cell, bool as_boc) { return kv_.set(cell->get_hash().as_slice(), serialize_value(refcnt, cell, as_boc)); } + +td::Status CellStorer::merge(td::Slice hash, td::int32 refcnt_diff) { + return kv_.merge(hash, serialize_refcnt_diffs(refcnt_diff)); +} + +void CellStorer::merge_value_and_refcnt_diff(std::string &left, td::Slice right) { + if (right.empty()) { + return; + } + CHECK(left.size() > 4); + CHECK(right.size() == 4); + + td::int32 left_refcnt = td::as(left.data()); + size_t shift = 0; + if (left_refcnt == -1) { + CHECK(left.size() >= 8); + left_refcnt = td::as(left.data() + 4); + shift = 4; + } + td::int32 right_refcnt_diff = td::as(right.data()); + td::int32 new_refcnt = left_refcnt + right_refcnt_diff; + CHECK(new_refcnt > 0); + td::as(left.data() + shift) = new_refcnt; +} +void CellStorer::merge_refcnt_diffs(std::string &left, td::Slice right) { + if (right.empty()) { + return; + } + if (left.empty()) { + left = right.str(); + return; + } + CHECK(left.size() == 4); + CHECK(right.size() == 4); + td::int32 left_refcnt_diff = td::as(left.data()); + td::int32 right_refcnt_diff = td::as(right.data()); + td::int32 total_refcnt_diff = left_refcnt_diff + right_refcnt_diff; + td::as(left.data()) = total_refcnt_diff; +} + +std::string CellStorer::serialize_refcnt_diffs(td::int32 refcnt_diff) { + TD_PERF_COUNTER(cell_store_refcnt_diff); + std::string s(4, 0); + td::as(s.data()) = refcnt_diff; + return s; +} + +td::Status CellStorer::apply_diff(const Diff &diff) { + switch (diff.type) { + case Diff::Set: + return kv_.set(diff.key.as_slice(), diff.value); + case Diff::Erase: + return kv_.erase(diff.key.as_slice()); + case Diff::Merge: + return kv_.merge(diff.key.as_slice(), diff.value); + default: + UNREACHABLE(); + } +} +td::Status CellStorer::apply_meta_diff(const MetaDiff &diff) { + switch (diff.type) { + case MetaDiff::Set: + CHECK(diff.key.size() != CellTraits::hash_bytes); + CHECK(!diff.value.empty()); + return kv_.set(diff.key, diff.value); + case MetaDiff::Erase: + CHECK(diff.key.size() != CellTraits::hash_bytes); + CHECK(diff.value.empty()); + return kv_.erase(diff.key); + default: + UNREACHABLE(); + } +} } // namespace vm diff --git a/crypto/vm/db/CellStorage.h b/crypto/vm/db/CellStorage.h index cabd7fdcb..ca32a8007 100644 --- a/crypto/vm/db/CellStorage.h +++ b/crypto/vm/db/CellStorage.h @@ -51,6 +51,9 @@ class CellLoader { td::Result load(td::Slice hash, bool need_data, ExtCellCreator &ext_cell_creator); static td::Result load(td::Slice hash, td::Slice value, bool need_data, ExtCellCreator &ext_cell_creator); td::Result load_refcnt(td::Slice hash); // This only loads refcnt_, cell_ == null + KeyValueReader &key_value_reader() const { + return *reader_; + } private: std::shared_ptr reader_; @@ -62,8 +65,28 @@ class CellStorer { CellStorer(KeyValue &kv); td::Status erase(td::Slice hash); td::Status set(td::int32 refcnt, const td::Ref &cell, bool as_boc); + td::Status merge(td::Slice hash, td::int32 refcnt_diff); + + static void merge_value_and_refcnt_diff(std::string &value, td::Slice right); + static void merge_refcnt_diffs(std::string &left, td::Slice right); + static std::string serialize_refcnt_diffs(td::int32 refcnt_diff); + static std::string serialize_value(td::int32 refcnt, const td::Ref &cell, bool as_boc); + struct Diff { + enum Type { Set, Erase, Merge } type{Set}; + CellHash key; + std::string value{}; + }; + td::Status apply_diff(const Diff &diff); + + struct MetaDiff { + enum Type { Set, Erase } type{Set}; + std::string key; + std::string value{}; + }; + td::Status apply_meta_diff(const MetaDiff &diff); + private: KeyValue &kv_; }; diff --git a/crypto/vm/db/DynamicBagOfCellsDb.cpp b/crypto/vm/db/DynamicBagOfCellsDb.cpp index 093037583..d6731b039 100644 --- a/crypto/vm/db/DynamicBagOfCellsDb.cpp +++ b/crypto/vm/db/DynamicBagOfCellsDb.cpp @@ -66,19 +66,27 @@ struct CellInfo { struct Eq { using is_transparent = void; // Pred to use - bool operator()(const CellInfo &info, const CellInfo &other_info) const { return info.key() == other_info.key();} - bool operator()(const CellInfo &info, td::Slice hash) const { return info.key().as_slice() == hash;} - bool operator()(td::Slice hash, const CellInfo &info) const { return info.key().as_slice() == hash;} - + bool operator()(const CellInfo &info, const CellInfo &other_info) const { + return info.key() == other_info.key(); + } + bool operator()(const CellInfo &info, td::Slice hash) const { + return info.key().as_slice() == hash; + } + bool operator()(td::Slice hash, const CellInfo &info) const { + return info.key().as_slice() == hash; + } }; struct Hash { using is_transparent = void; // Pred to use using transparent_key_equal = Eq; - size_t operator()(td::Slice hash) const { return cell_hash_slice_hash(hash); } - size_t operator()(const CellInfo &info) const { return cell_hash_slice_hash(info.key().as_slice());} + size_t operator()(td::Slice hash) const { + return cell_hash_slice_hash(hash); + } + size_t operator()(const CellInfo &info) const { + return cell_hash_slice_hash(info.key().as_slice()); + } }; }; - bool operator<(const CellInfo &a, td::Slice b) { return a.key().as_slice() < b; } @@ -99,6 +107,36 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat td::Result> ext_cell(Cell::LevelMask level_mask, td::Slice hash, td::Slice depth) override { return get_cell_info_lazy(level_mask, hash, depth).cell; } + td::Result>> meta_get_all(size_t max_count) const override { + std::vector> result; + auto s = loader_->key_value_reader().for_each_in_range("desc", "desd", + [&](const td::Slice &key, const td::Slice &value) { + if (result.size() >= max_count) { + return td::Status::Error("COUNT_LIMIT"); + } + if (td::begins_with(key, "desc") && key.size() != 32) { + result.emplace_back(key.str(), value.str()); + } + return td::Status::OK(); + }); + if (s.message() == "COUNT_LIMIT") { + s = td::Status::OK(); + } + TRY_STATUS(std::move(s)); + return result; + } + td::Result meta_get(td::Slice key, std::string &value) override { + return loader_->key_value_reader().get(key, value); + } + td::Status meta_set(td::Slice key, td::Slice value) override { + meta_diffs_.push_back( + CellStorer::MetaDiff{.type = CellStorer::MetaDiff::Set, .key = key.str(), .value = value.str()}); + return td::Status::OK(); + } + td::Status meta_erase(td::Slice key) override { + meta_diffs_.push_back(CellStorer::MetaDiff{.type = CellStorer::MetaDiff::Erase, .key = key.str()}); + return td::Status::OK(); + } td::Result> load_cell(td::Slice hash) override { auto info = hash_table_.get_if_exists(hash); if (info && info->sync_with_db) { @@ -198,21 +236,29 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat if (is_prepared_for_commit()) { return td::Status::OK(); } + td::PerfWarningTimer timer_dfs_new_cells_in_db("dfs_new_cells_in_db"); for (auto &new_cell : to_inc_) { auto &new_cell_info = get_cell_info(new_cell); dfs_new_cells_in_db(new_cell_info); } + timer_dfs_new_cells_in_db.reset(); + td::PerfWarningTimer timer_dfs_new_cells("dfs_new_cells"); for (auto &new_cell : to_inc_) { auto &new_cell_info = get_cell_info(new_cell); dfs_new_cells(new_cell_info); } + timer_dfs_new_cells.reset(); + td::PerfWarningTimer timer_dfs_old_cells("dfs_old_cells"); for (auto &old_cell : to_dec_) { auto &old_cell_info = get_cell_info(old_cell); dfs_old_cells(old_cell_info); } + timer_dfs_old_cells.reset(); + td::PerfWarningTimer timer_save_diff_prepare("save_diff_prepare"); save_diff_prepare(); + timer_save_diff_prepare.reset(); to_inc_.clear(); to_dec_.clear(); @@ -222,6 +268,7 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat td::Status commit(CellStorer &storer) override { prepare_commit(); + td::PerfWarningTimer times_save_diff("save diff", 0.01); save_diff(storer); // Some elements are erased from hash table, to keep it small. // Hash table is no longer represents the difference between the loader and @@ -249,7 +296,7 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat celldb_compress_depth_ = value; } - vm::ExtCellCreator& as_ext_cell_creator() override { + vm::ExtCellCreator &as_ext_cell_creator() override { return *this; } @@ -259,6 +306,7 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat std::vector> to_dec_; CellHashTable hash_table_; std::vector visited_; + std::vector meta_diffs_; Stats stats_diff_; td::uint32 celldb_compress_depth_{0}; @@ -269,8 +317,9 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat class SimpleExtCellCreator : public ExtCellCreator { public: - explicit SimpleExtCellCreator(std::shared_ptr cell_db_reader) : - cell_db_reader_(std::move(cell_db_reader)) {} + explicit SimpleExtCellCreator(std::shared_ptr cell_db_reader) + : cell_db_reader_(std::move(cell_db_reader)) { + } td::Result> ext_cell(Cell::LevelMask level_mask, td::Slice hash, td::Slice depth) override { TRY_RESULT(ext_cell, DynamicBocExtCell::create(PrunnedCellInfo{level_mask, hash, depth}, @@ -279,7 +328,7 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat return std::move(ext_cell); } - std::vector>& get_created_cells() { + std::vector> &get_created_cells() { return created_cells_; } @@ -382,8 +431,7 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat } bool not_in_db = false; - for_each( - info, [¬_in_db, this](auto &child_info) { not_in_db |= !dfs_new_cells_in_db(child_info); }, false); + for_each(info, [¬_in_db, this](auto &child_info) { not_in_db |= !dfs_new_cells_in_db(child_info); }, false); if (not_in_db) { CHECK(!info.in_db); @@ -441,6 +489,10 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat for (auto info_ptr : visited_) { save_cell(*info_ptr, storer); } + for (auto meta_diff : meta_diffs_) { + storer.apply_meta_diff(meta_diff); + } + meta_diffs_.clear(); visited_.clear(); } @@ -558,6 +610,8 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat } auto res = r_res.move_as_ok(); if (res.status != CellLoader::LoadResult::Ok) { + LOG_CHECK(info.cell.not_null()) << "Trying to load nonexistent cell from db " + << CellHash::from_slice(hash).to_hex(); break; } info.cell = std::move(res.cell()); @@ -651,7 +705,7 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat CellHashTable cells_; - std::queue load_queue_; + std::queue load_queue_; td::uint32 active_load_ = 0; td::uint32 max_parallel_load_ = 4; }; @@ -814,11 +868,10 @@ class DynamicBagOfCellsDbImpl : public DynamicBagOfCellsDb, private ExtCellCreat pca_state_->promise_.set_result(td::Unit()); pca_state_ = {}; } - }; } // namespace -std::unique_ptr DynamicBagOfCellsDb::create() { +std::unique_ptr DynamicBagOfCellsDb::create(CreateV1Options) { return std::make_unique(); } } // namespace vm diff --git a/crypto/vm/db/DynamicBagOfCellsDb.h b/crypto/vm/db/DynamicBagOfCellsDb.h index 62864ad97..82028f3fe 100644 --- a/crypto/vm/db/DynamicBagOfCellsDb.h +++ b/crypto/vm/db/DynamicBagOfCellsDb.h @@ -17,6 +17,7 @@ Copyright 2017-2020 Telegram Systems LLP */ #pragma once +#include "td/db/KeyValue.h" #include "vm/cells.h" #include "td/utils/Slice.h" @@ -49,13 +50,23 @@ class CellDbReader { class DynamicBagOfCellsDb { public: virtual ~DynamicBagOfCellsDb() = default; + + virtual td::Result>> meta_get_all(size_t max_count) const = 0; + virtual td::Result meta_get(td::Slice key, std::string &value) = 0; + virtual td::Status meta_set(td::Slice key, td::Slice value) = 0; + virtual td::Status meta_erase(td::Slice key) = 0; + virtual td::Result> load_cell(td::Slice hash) = 0; virtual td::Result> load_root(td::Slice hash) = 0; virtual td::Result> load_root_thread_safe(td::Slice hash) const = 0; + virtual td::Result>> load_known_roots() const { + return std::vector>(); + } struct Stats { td::int64 roots_total_count{0}; td::int64 cells_total_count{0}; td::int64 cells_total_size{0}; + td::NamedStats named_stats; std::vector> custom_stats; void apply_diff(const Stats &diff) { roots_total_count += diff.roots_total_count; @@ -64,6 +75,20 @@ class DynamicBagOfCellsDb { CHECK(roots_total_count >= 0); CHECK(cells_total_count >= 0); CHECK(cells_total_size >= 0); + named_stats.apply_diff(diff.named_stats); + } + friend td::StringBuilder &operator<<(td::StringBuilder &sb, const Stats &stats) { + sb << "STATS\n"; + for (auto &p : stats.custom_stats) { + sb << "\t" << p.first << "\t" << p.second << "\n"; + } + for (auto &p : stats.named_stats.stats_int) { + sb << "\t" << p.first << "\t" << p.second << "\n"; + } + for (auto &p : stats.named_stats.stats_str) { + sb << "\t" << p.first << "\t" << p.second << "\n"; + } + return sb; } }; virtual void inc(const Ref &old_root) = 0; @@ -72,7 +97,7 @@ class DynamicBagOfCellsDb { virtual td::Status prepare_commit() = 0; virtual Stats get_stats_diff() = 0; virtual td::Result get_stats() { - return td::Status::Error("Not implemented"); + return Stats{}; } virtual td::Status commit(CellStorer &) = 0; virtual std::shared_ptr get_cell_db_reader() = 0; @@ -83,25 +108,49 @@ class DynamicBagOfCellsDb { virtual void set_celldb_compress_depth(td::uint32 value) = 0; virtual vm::ExtCellCreator &as_ext_cell_creator() = 0; - static std::unique_ptr create(); + class AsyncExecutor { + public: + virtual ~AsyncExecutor() { + } + virtual void execute_async(std::function f) = 0; + virtual void execute_sync(std::function f) = 0; + virtual std::string describe() const { + return "AsyncExecutor"; + } + }; + + struct CreateV1Options { + friend td::StringBuilder &operator<<(td::StringBuilder &sb, const CreateV1Options &options) { + return sb << "V1{}"; + } + }; + static std::unique_ptr create(CreateV1Options = {}); + + struct CreateV2Options { + size_t extra_threads{std::thread::hardware_concurrency()}; + std::shared_ptr executor{}; + size_t cache_ttl_max{2000}; + size_t cache_size_max{1000000}; + friend td::StringBuilder &operator<<(td::StringBuilder &sb, const CreateV2Options &options) { + return sb << "V2{extra_threads=" << options.extra_threads << ", cache_ttl_max=" << options.cache_ttl_max + << ", cache_size_max=" << options.cache_size_max << "}"; + } + }; + static std::unique_ptr create_v2(CreateV2Options options); struct CreateInMemoryOptions { size_t extra_threads{std::thread::hardware_concurrency()}; bool verbose{true}; - // Allocated DataCels will never be deleted + // Allocated DataCells will never be deleted bool use_arena{false}; // Almost no overhead in memory during creation, but will scan database twice bool use_less_memory_during_creation{true}; - }; - static std::unique_ptr create_in_memory(td::KeyValueReader *kv, CreateInMemoryOptions options); - - class AsyncExecutor { - public: - virtual ~AsyncExecutor() { + friend td::StringBuilder &operator<<(td::StringBuilder &sb, const CreateInMemoryOptions &options) { + return sb << "InMemory{extra_threads=" << options.extra_threads << ", use_arena=" << options.use_arena + << ", use_less_memory_during_creation=" << options.use_less_memory_during_creation << "}"; } - virtual void execute_async(std::function f) = 0; - virtual void execute_sync(std::function f) = 0; }; + static std::unique_ptr create_in_memory(td::KeyValueReader *kv, CreateInMemoryOptions options); virtual void load_cell_async(td::Slice hash, std::shared_ptr executor, td::Promise> promise) = 0; diff --git a/crypto/vm/db/DynamicBagOfCellsDbV2.cpp b/crypto/vm/db/DynamicBagOfCellsDbV2.cpp new file mode 100644 index 000000000..eff74e214 --- /dev/null +++ b/crypto/vm/db/DynamicBagOfCellsDbV2.cpp @@ -0,0 +1,1511 @@ +#include "vm/db/DynamicBagOfCellsDb.h" +#include "vm/db/CellStorage.h" +#include "vm/db/CellHashTable.h" + +#include "vm/cells/ExtCell.h" + +#include "td/utils/base64.h" +#include "td/utils/format.h" +#include "td/utils/ThreadSafeCounter.h" +#include "td/utils/misc.h" +#include "validator/validator.h" + +#include "vm/cellslice.h" + +#include + +namespace vm { +namespace { + +// Very stupid Vector/MpmcQueue +template +struct TsVector { + TsVector() { + first_block_size_ = 64; + blocks_[0].data.resize(first_block_size_); + blocks_[0].is_ready = true; + } + TsVector(std::vector base) { + first_block_size_ = base.size(); + blocks_[0].data = std::move(base); + blocks_[0].is_ready = true; + } + struct Block { + std::mutex mutex; + std::atomic is_ready{false}; + std::vector data; + }; + T &at(size_t i) { + td::uint64 j = i / first_block_size_; + td::int32 hb = 63 - td::count_leading_zeroes64(j); // hb = -1 if j=0, else hb>=0 + + // If j=0, hb<0, so hb>>31 = -1 => mask=0 + // If j>0, hb>=0, so hb>>31=0 => mask=~0 (all ones) + td::uint64 mask = ~(td::uint64)(hb >> 31); + + size_t block_i = hb + 1; + uint64_t shift = hb & 63ULL; + uint64_t start = ((1ULL << shift) * first_block_size_) & mask; + size_t pos_in_block = i - start; + auto &block = blocks_[block_i]; + if (block.is_ready.load(std::memory_order_acquire)) { + return block.data.at(pos_in_block); + } + + std::unique_lock lock(block.mutex); + if (block.is_ready.load(std::memory_order_acquire)) { + return block.data.at(pos_in_block); + } + block.resize(start); + block.is_ready.store(true, std::memory_order_release); + return block.data.at(pos_in_block); + } + template + void push_back(S &&value) { + at(end_.fetch_add(1, std::memory_order_relaxed)) = std::forward(value); + } + T pop_front() { + auto pos = begin_.fetch_add(1, std::memory_order_relaxed); + while (pos >= end_.load(std::memory_order_acquire)) { + // This may (or may not) use too much CPU + td::this_thread::yield(); + } + return std::move(at(pos)); + } + size_t size() const { + return end_.load(); + } + + std::array blocks_; + size_t first_block_size_{0}; + std::atomic begin_{0}; + std::atomic end_{0}; +}; +struct CellInfo; + +class CellDbReaderExt; +struct DynamicBocExtCellExtra { + std::shared_ptr reader; +}; + +class DynamicBocCellLoader { + public: + static td::Result> load_data_cell(const ExtCell &cell, + const DynamicBocExtCellExtra &extra); +}; +using DynamicBocExtCell = ExtCell; + +class CellDbReaderExt : public CellDbReader { + public: + virtual td::Result> load_ext_cell(Ref cell) = 0; +}; + +td::Result> DynamicBocCellLoader::load_data_cell(const DynamicBocExtCell &cell, + const DynamicBocExtCellExtra &extra) { + return extra.reader->load_ext_cell(Ref(&cell)); +} + +#define S(x) \ + td::NamedThreadSafeCounter::CounterRef x { \ + nc.get_counter(#x) \ + } + +struct CacheStats { + td::NamedThreadSafeCounter nc; + S(load_cell_ext); + S(load_cell_ext_cache_hits); + S(load_cell_sync); + S(load_cell_sync_cache_hits); + S(load_cell_async); + S(load_cell_async_cache_hits); + S(ext_cells); + S(ext_cells_load); + S(ext_cells_load_cache_hits); + + S(kv_read_found); + S(kv_read_not_found); + + S(sync_with_db); + S(sync_with_db_only_ref); + S(load_cell_no_cache); +}; + +struct CommitStats { + td::NamedThreadSafeCounter nc; + + S(to_inc); + S(to_dec); + + S(gather_new_cells_calls); + S(gather_new_cells_calls_it); + S(update_parents_calls); + S(update_parents_calls_it); + S(dec_calls); + S(dec_calls_it); + + S(new_cells); + S(new_cells_leaves); + + S(new_cells_loaded_not_in_db); + S(new_cells_loaded_in_db); + S(new_cells_not_in_db_fast); + + S(dec_loaded); + S(dec_to_zero); + + S(changes_loaded); + + // new diff logic + S(diff_zero); + S(diff_full); + S(diff_erase); + S(diff_ref_cnt); + + // old full data logic + S(inc_save); + S(inc_save_full); + S(inc_save_only_ref_cnt); + S(inc_new_cell); + S(inc_just_ref_cnt); + + S(dec_save); + S(dec_save_full); + S(dec_save_only_refcnt); + S(dec_save_erase); + S(dec_erase_cell); + S(dec_just_ref_cnt); +}; + +template +struct AtomicPod { + T load() const { + while (true) { + if (auto res = try_read_stable()) { + return res->second; + } + } + } + + template + std::pair update(F &&f) { + while (true) { + auto res = try_read_stable(); + if (!res) { + continue; + } + auto [before, old_data] = *res; + + auto o_new_data = f(old_data); + if (!o_new_data) { + return {old_data, false}; + } + + if (!lock_.compare_exchange_weak(before, before + 1, std::memory_order_acq_rel, std::memory_order_relaxed)) { + continue; + } + + data_ = *o_new_data; // relaxed store inside lock + lock_.fetch_add(1, std::memory_order_release); + return {*o_new_data, true}; + } + } + + private: + mutable std::atomic lock_{0}; + T data_{}; + + std::optional> try_read_stable() const { + auto before = lock_.load(std::memory_order_acquire); + if (before % 2 == 1) { + return std::nullopt; + } + T temp = data_; // relaxed read is ok, checked by versioning + auto after = lock_.load(std::memory_order_acquire); + if (after != before) { + return std::nullopt; + } + return std::make_pair(before, temp); + } +}; + +struct InDbInfo { + std::vector parents; + std::atomic pending_children{0}; + std::atomic maybe_in_db{true}; + std::atomic visited_in_gather_new_cells{false}; +}; +td::StringBuilder &operator<<(td::StringBuilder &sb, const InDbInfo &info) { + sb << "mb_in_db:" << info.maybe_in_db.load() << " chld_n:" << info.pending_children + << " prnt_n:" << info.parents.size(); + return sb; +} +struct CellInfo { + struct State { + // db_ref_cnt and in_db are correct + bool sync_with_db{false}; + + // ignore if sync_with_db is false + td::int32 db_ref_cnt{0}; + td::int32 db_refcnt_fixup{0}; + + // if true - cell is definitely in db + // if false - we know that cell is not in db only is sync_with_db=true + bool in_db{false}; + + // diff to be applied + }; + AtomicPod state; + std::atomic ref_cnt_diff{0}; + + std::atomic visited{false}; + td::unique_ptr in_db_info_ptr; + std::mutex mutex; + + // Could be AtomicRef, but is am not sure that it is worth it + const Ref cell; + + explicit CellInfo(Ref cell) : cell(std::move(cell)) { + } + + InDbInfo &in_db_info() { + return *in_db_info_ptr; + } + const InDbInfo &in_db_info() const { + return *in_db_info_ptr; + } + InDbInfo &in_db_info_create() { // NOT thread safe + if (!in_db_info_ptr) { + in_db_info_ptr = td::make_unique(); + } + return in_db_info(); + } + InDbInfo &in_db_info_create(CellInfo *parent) { // Thread Safe + std::unique_lock lock(mutex); + if (!in_db_info_ptr) { + in_db_info_ptr = td::make_unique(); + } + auto &res = *in_db_info_ptr; + if (parent != nullptr) { + res.parents.emplace_back(parent); + } + lock.unlock(); + return res; + } + void in_db_info_destroy() { + in_db_info_ptr = nullptr; + } + td::int32 inc_ref_cnt() { + return ref_cnt_diff.fetch_add(1, std::memory_order_relaxed) + 1; + } + td::int32 dec_ref_cnt() { + return ref_cnt_diff.fetch_sub(1, std::memory_order_relaxed) - 1; + } + td::int32 get_ref_cnt_diff() const { + return ref_cnt_diff.load(std::memory_order_relaxed); + } + + void set_not_in_db() { + state.update([&](State state) -> std::optional { + if (state.sync_with_db) { + CHECK(state.db_ref_cnt == 0); + CHECK(!state.in_db); + return {}; + } + state.sync_with_db = true; + state.in_db = false; + state.db_ref_cnt = 0; + return state; + }); + } + void set_in_db() { + state.update([&](State state) -> std::optional { + if (state.sync_with_db) { + //LOG_CHECK(state.in_db) << *this; + return {}; + } + state.in_db = true; + return state; + }); + } + void synced_with_db(td::int32 db_ref_cnt) { + state.update([&](State state) -> std::optional { + if (state.sync_with_db) { + CHECK(state.in_db); + CHECK(state.db_ref_cnt == db_ref_cnt); + return {}; + } + state.in_db = true; + state.db_ref_cnt = db_ref_cnt; + return state; + }); + } + bool visit() { + return !visited.exchange(true); + } + void on_written_to_db() { + auto diff = ref_cnt_diff.exchange(0); + state.update([&](State state) -> std::optional { + if (diff == 0) { + return {}; + } + if (state.sync_with_db) { + state.db_ref_cnt += diff; + CHECK(state.db_ref_cnt >= 0); + state.in_db = state.db_ref_cnt > 0; + } else { + CHECK(diff > 0); + state.in_db = true; + state.db_refcnt_fixup += diff; + } + return state; + }); + } + + td::Result> get_data_cell() { + TRY_RESULT(loaded_cell, cell->load_cell()); + return loaded_cell.data_cell; + } + Cell::Hash key() const { + return cell->get_hash(); + } + bool operator<(const CellInfo &other) const { + return key() < other.key(); + } + + struct Eq { + using is_transparent = void; // Pred to use + bool operator()(const CellInfo &info, const CellInfo &other_info) const { + return info.key() == other_info.key(); + } + bool operator()(const CellInfo &info, td::Slice hash) const { + return info.key().as_slice() == hash; + } + bool operator()(td::Slice hash, const CellInfo &info) const { + return info.key().as_slice() == hash; + } + }; + struct Hash { + using is_transparent = void; // Pred to use + using transparent_key_equal = Eq; + size_t operator()(td::Slice hash) const { + return cell_hash_slice_hash(hash); + } + size_t operator()(const CellInfo &info) const { + return cell_hash_slice_hash(info.key().as_slice()); + } + }; +}; +td::StringBuilder &operator<<(td::StringBuilder &sb, const CellInfo &info) { + if (info.cell->is_loaded()) { + auto data_cell = info.cell->load_cell().move_as_ok().data_cell; + vm::CellSlice cs(vm::NoVm{}, data_cell); + sb << data_cell->get_hash().to_hex().substr(0, 8) << " refs:" << data_cell->size_refs() + << " data:" << cs.data_bits().to_hex(cs.size()) << " data_ptr=" << data_cell.get() << " data_ref_cnt(" + << data_cell->get_refcnt() << ")"; + } else { + sb << info.cell->get_hash().to_hex().substr(0, 8); + } + auto state = info.state.load(); + sb << " " << &info; + sb << "\n\tin_db=" << state.in_db << " sync_with_db=" << state.sync_with_db + << " ref_cnt_diff=" << info.get_ref_cnt_diff() << " db_ref_cnt=" << state.db_ref_cnt + << " db_ref_cnt_fixup=" << state.db_refcnt_fixup; + if (state.sync_with_db) { + sb << " REFS(" << info.get_ref_cnt_diff() + state.db_ref_cnt << ")"; + } + if (info.in_db_info_ptr) { + sb << " " << info.in_db_info(); + } + sb << " visited=" << info.visited.load(); + return sb; +} + +struct ExecutorOptions { + size_t extra_threads_n{0}; + std::shared_ptr async_executor; +}; +template +class ExecutorImpl { + public: + ExecutorImpl(ExecutorOptions options) : options_(options) { + } + ExecutorOptions options_; + using InputData = std::vector>; + using OutputData = std::vector>; + struct InputChunk { + td::Span infos; + size_t begin{}; + size_t end{}; + }; + + template + OutputData process(const InputData &data, const F &process_task_f) { + if (options_.extra_threads_n > 0) { + return process_parallel(data, process_task_f); + } else { + return process_sequential(data, process_task_f); + } + } + template + struct SingleThreadWorker { + const F &process_task_f; + mutable std::vector results{}; + void add_task(InputT input) const { + process_task_f(input, *this); + } + void add_result(OutputT output) const { + results.push_back(output); + } + }; + template + OutputData process_sequential(const InputData &data, const F &process_task_f) { + auto w = SingleThreadWorker{process_task_f}; + for (auto &chunk : data) { + for (auto &info : chunk) { + process_task_f(info, w); + } + } + + return {std::move(w.results)}; + } + + template + struct Shared; + + template + struct Worker { + size_t worker_i{}; + std::shared_ptr> shared; + + void add_task(InputT input) const { + shared->delay_or_process_task(input, *this); + } + void add_result(OutputT value) const { + shared->add_result(value, worker_i); + } + void loop() const { + shared->loop(*this); + } + }; + + template + struct Shared { + Shared(size_t workers_n, const InputData &input_data, const ProcessTaskF &process_task_f) + : input_chunks(prepare_input_chunks(input_data)) + , workers_n(workers_n) + , input_size(input_chunks.empty() ? 0 : input_chunks.back().end) + , batch_size(std::clamp(input_size / workers_n / 4, size_t(1), size_t(128))) + , process_task_f(process_task_f) { + } + + const std::vector input_chunks; + + const size_t workers_n{0}; + const size_t input_size{0}; + const size_t batch_size{128}; + + const ProcessTaskF &process_task_f; + + // Position in input + std::atomic next_input_i{0}; + + // Shared queue + // Probably a simpler queue would also work fine + td::MpmcQueue mpmc_queue{workers_n}; + using Waiter = td::MpmcSleepyWaiter; + Waiter waiter; + std::atomic mpmc_queue_size{workers_n}; // guard + + // Output vectors + struct ThreadData { + std::vector output; + char pad[TD_CONCURRENCY_PAD - sizeof(output)]; + }; + std::vector thread_data{workers_n}; + + auto prepare_input_chunks(const InputData &input_data) { + std::vector chunks; + for (auto &chunk : input_data) { + size_t prev_end = chunks.empty() ? 0 : chunks.back().end; + chunks.push_back({.infos = td::as_span(chunk), .begin = prev_end, .end = prev_end + chunk.size()}); + } + return chunks; + } + + void delay_or_process_task(InputT input, const Worker &worker) { + // if there is enough tasks in queue, we continue recursion + if (mpmc_queue_size.load(std::memory_order_acquire) > 256) { + process_task_f(input, worker); + } else { + mpmc_queue_size.fetch_add(1, std::memory_order_acq_rel); + mpmc_queue.push(input, worker.worker_i); + waiter.notify(); + } + } + + void add_result(OutputT result, size_t worker_i) { + thread_data[worker_i].output.push_back(std::move(result)); + } + + void process_initial_input(const Worker &worker) { + size_t input_chunk_i = 0; + while (true) { + auto begin_i = next_input_i.fetch_add(batch_size, std::memory_order_relaxed); + auto end_i = begin_i + batch_size; + if (begin_i >= input_size) { + break; + } + for (size_t i = begin_i; i < end_i && i < input_size; i++) { + while (input_chunks[input_chunk_i].end <= i) { + input_chunk_i++; + } + auto offset = i - input_chunks[input_chunk_i].begin; + auto task = input_chunks[input_chunk_i].infos[offset]; + process_task_f(task, worker); + } + } + } + + void on_processed_task_from_queue(size_t worker_i) { + if (mpmc_queue_size.fetch_add(-1, std::memory_order_acq_rel) == 1) { + for (size_t i = 0; i < workers_n; i++) { + mpmc_queue.push(nullptr, worker_i); + waiter.notify(); + } + } + } + + void process_queue(const Worker &worker) { + on_processed_task_from_queue(worker.worker_i); + + Waiter::Slot slot; + waiter.init_slot(slot, td::narrow_cast(worker.worker_i)); + + while (true) { + InputT input{}; + if (mpmc_queue.try_pop(input, worker.worker_i)) { + waiter.stop_wait(slot); + if (!input) { + break; + } + process_task_f(input, worker); + on_processed_task_from_queue(worker.worker_i); + } else { + waiter.wait(slot); + } + } + } + void loop(const Worker &worker) { + process_initial_input(worker); + process_queue(worker); + } + void finish() const { + CHECK(mpmc_queue_size == 0); + } + }; + + template + OutputData process_parallel(const InputData &input_data, const F &process_task_f) { + const size_t workers_n = options_.extra_threads_n + 1; + auto shared = std::make_shared>(workers_n, input_data, process_task_f); + + CHECK(workers_n >= 1); + for (size_t i = 0; i < workers_n; i++) { + auto to_run = [worker = Worker{.worker_i = i, .shared = shared}] { worker.loop(); }; + + if (i + 1 == workers_n) { + to_run(); + } else if (options_.async_executor) { + options_.async_executor->execute_async(std::move(to_run)); + } else { + // NB: td::thread, NOT std::thread + td::thread(std::move(to_run)).detach(); + } + } + shared->finish(); + return td::transform(shared->thread_data, [](auto &&x) { return std::move(x.output); }); + } +}; +struct Executor { + Executor(ExecutorOptions options = {}) : options_(options) { + } + + template + auto operator()(const std::vector> &data, const F &process_task_f) { + return ExecutorImpl(options_).process(data, process_task_f); + } + + private: + ExecutorOptions options_; +}; + +// Thread safe storage for CellInfo +// Will be used by everybody as shared cache. Yes there is some overhead, but it don't want to create other hash table +struct CellInfoStorage { + public: + // All methods are thead safe + // All CellInfo pointers lives as long as CellInfoStorage + + // returns CellInfo, only if it is already exists + CellInfo *get_cell_info(td::Slice hash) { + return lock(hash)->hash_table.get_if_exists(hash); + } + + CellInfo &create_cell_info_from_db(Ref data_cell, td::int32 ref_cnt) { + auto &info = create_cell_info_from_data_cell(std::move(data_cell)); + info.synced_with_db(ref_cnt); + return info; + } + + // Creates CellInfo from data_cell, or updates existing CellInfo if is not yet loaded + CellInfo &create_cell_info_from_data_cell(Ref cell) { + CHECK(cell.not_null()); + CHECK(cell->is_loaded()); + + auto hash = cell->get_hash(); + auto [info, created] = lock(hash.as_slice())->hash_table.emplace(hash.as_slice(), std::move(cell)); + + if (!created) { + info.cell->set_data_cell(std::move(cell)); + } + return info; + } + + // Creates CellInfo from cell. If cell is loaded, it will be used to rewrite or udpate current cell + CellInfo &create_cell_info(Ref cell, CellDbReaderExt *from_reader, CacheStats &stats) { + if (cell->is_loaded()) { + return create_cell_info_from_data_cell(cell->load_cell().move_as_ok().data_cell); + } + + bool our_ext_cell = false; + auto ext_cell = dynamic_cast(cell.get()); + if (ext_cell) { + auto prunned_cell = ext_cell->get_prunned_cell(); + if (prunned_cell.not_null()) { + our_ext_cell = prunned_cell->get_extra().reader.get() == from_reader; + } + our_ext_cell = true; + } else if (!cell->is_loaded()) { + // if we cached cell from OTHER db is good idea to drop it ASAP + force_drop_cache_.store(true, std::memory_order_relaxed); + } + + auto hash = cell->get_hash(); + auto [info, created] = lock(hash.as_slice())->hash_table.emplace(hash.as_slice(), std::move(cell)); + if (our_ext_cell) { + stats.ext_cells_load.inc(); + if (info.cell->is_loaded()) { + stats.ext_cells_load_cache_hits.inc(); + } + info.set_in_db(); + } + return info; + } + + void dump() { + LOG(ERROR) << "===========BEGIN DUMP==========="; + for (auto &bucket : buckets_) { + std::lock_guard guard(bucket.mutex); + bucket.hash_table.for_each([&](auto &info) { LOG(INFO) << info; }); + } + LOG(ERROR) << "===========END DUMP==========="; + } + + size_t cache_size() { + size_t res = 0; + for (auto &bucket : buckets_) { + std::lock_guard guard(bucket.mutex); + res += bucket.hash_table.size(); + } + return res; + } + bool force_drop_cache() { + return force_drop_cache_.load(std::memory_order_relaxed); + } + + private: + struct Bucket { + std::mutex mutex; + CellHashTable hash_table; + }; + constexpr static size_t buckets_n = 8192; + std::array bucket_; + + struct Unlock { + void operator()(Bucket *bucket) const { + bucket->mutex.unlock(); + } + }; + std::array buckets_{}; + std::atomic force_drop_cache_{false}; + + std::unique_ptr lock(Bucket &bucket) { + bucket.mutex.lock(); + return std::unique_ptr(&bucket); + } + std::unique_ptr lock(td::Slice key) { + auto hash = td::as(key.substr(16, 8).ubegin()); + auto bucket_i = hash % buckets_n; + return lock(buckets_[bucket_i]); + } +}; + +class DynamicBagOfCellsDbImplV2 : public DynamicBagOfCellsDb { + public: + explicit DynamicBagOfCellsDbImplV2(CreateV2Options options) : options_(options) { + get_thread_safe_counter().inc(); + // LOG(ERROR) << "Constructor called for DynamicBagOfCellsDbImplV2"; + } + ~DynamicBagOfCellsDbImplV2() { + // LOG(ERROR) << "Destructor called for DynamicBagOfCellsDbImplV2"; + get_thread_safe_counter().add(-1); + + if (cell_db_reader_) { + cell_db_reader_->drop_cache(); + } + } + + td::Result>> meta_get_all(size_t max_count) const override { + CHECK(meta_db_fixup_.empty()); + std::vector> result; + auto s = cell_db_reader_->key_value_reader().for_each_in_range( + "desc", "desd", [&](const td::Slice &key, const td::Slice &value) { + if (result.size() >= max_count) { + return td::Status::Error("COUNT_LIMIT"); + } + if (td::begins_with(key, "desc") && key.size() != 32) { + result.emplace_back(key.str(), value.str()); + } + return td::Status::OK(); + }); + if (s.message() == "COUNT_LIMIT") { + s = td::Status::OK(); + } + TRY_STATUS(std::move(s)); + return result; + } + td::Result meta_get(td::Slice key, std::string &value) override { + auto it = meta_db_fixup_.find(key); + if (it != meta_db_fixup_.end()) { + if (it->second.empty()) { + return KeyValue::GetStatus::NotFound; + } + value = it->second; + return KeyValue::GetStatus::Ok; + } + return cell_db_reader_->key_value_reader().get(key, value); + } + td::Status meta_set(td::Slice key, td::Slice value) override { + meta_diffs_.push_back( + CellStorer::MetaDiff{.type = CellStorer::MetaDiff::Set, .key = key.str(), .value = value.str()}); + return td::Status::OK(); + } + td::Status meta_erase(td::Slice key) override { + meta_diffs_.push_back(CellStorer::MetaDiff{.type = CellStorer::MetaDiff::Erase, .key = key.str()}); + return td::Status::OK(); + } + td::Result> load_cell(td::Slice hash) override { + CHECK(cell_db_reader_); + return cell_db_reader_->load_cell(hash); + } + td::Result> load_root(td::Slice hash) override { + return load_cell(hash); + } + td::Result> load_root_thread_safe(td::Slice hash) const override { + // TODO: it is better to use AtomicRef, or atomic shared pointer + // But to use AtomicRef we need a little refactoring + // And std::atomic> is still unsupported by clang + std::unique_lock lock(atomic_cell_db_reader_mutex_); + auto reader = atomic_cell_db_reader_; + lock.unlock(); + if (!reader) { + return td::Status::Error("Empty reader"); + } + return reader->load_cell(hash); + } + void load_cell_async(td::Slice hash, std::shared_ptr executor, + td::Promise> promise) override { + CHECK(cell_db_reader_); + return cell_db_reader_->load_cell_async(hash, std::move(executor), std::move(promise)); + } + void prepare_commit_async(std::shared_ptr executor, td::Promise promise) override { + auto promise_ptr = std::make_shared>(std::move(promise)); + executor->execute_async([this, promise_ptr = std::move(promise_ptr)] { + prepare_commit(); + promise_ptr->set_value(td::Unit()); + }); + } + + void inc(const Ref &cell) override { + if (cell.is_null()) { + return; + } + if (cell->get_virtualization() != 0) { + return; + } + to_inc_.push_back(cell); + } + void dec(const Ref &cell) override { + if (cell.is_null()) { + return; + } + if (cell->get_virtualization() != 0) { + return; + } + to_dec_.push_back(cell); + } + + bool is_prepared_for_commit() { + return to_inc_.empty() && to_dec_.empty(); + } + + Stats get_stats_diff() override { + return {}; + } + + td::Status prepare_commit() override { + if (is_prepared_for_commit()) { + return td::Status::OK(); + } + // NB: we don't use options.executor, because it is prone to deadlocks. We need extra_threads_n threads + // available for blocking + Executor executor{{.extra_threads_n = options_.extra_threads, .async_executor = {}}}; + // calculate in_db for all vertices reachable from to_inc_ roots + // - for ext cells we already know they are in db + // - calculate in_db up from leaves + // - if at least one child is not in db, then the cell is definitely not in db + // - so in best case only leaves will be loaded from db + // - this is optional step. All other logic must work in any case + // - only already loaded cells are loaded from db + + stats_.to_inc.add(to_inc_.size()); + stats_.to_dec.add(to_dec_.size()); + + std::vector> visited_cells; + auto add_visited_cells = [&](std::vector> new_visited_cells) { + for (auto &x : new_visited_cells) { + visited_cells.push_back(std::move(x)); + } + }; + + std::vector> new_cells_leaves; + { + td::PerfWarningTimer timer("celldb_v2: gather_new_cells"); + std::vector prepared_to_inc; + std::vector visited_roots; + for (auto &cell : to_inc_) { + auto &info = cell_db_reader_->cell_info(cell); + if (info.inc_ref_cnt() == 1 && info.visit()) { + visited_roots.push_back(&info); + } + if (info.state.load().in_db) { + continue; + } + auto &in_db_info = info.in_db_info_create(nullptr); + if (!in_db_info.visited_in_gather_new_cells.exchange(true)) { + prepared_to_inc.push_back(&info); + } + } + new_cells_leaves = + executor({std::move(prepared_to_inc)}, [&](CellInfo *info, auto &worker) { gather_new_cells(info, worker); }); + visited_cells.push_back(std::move(visited_roots)); + } + + // LOG(WARNING) << "new_cells_leaves: " << new_cells_leaves.size(); + { + td::PerfWarningTimer timer("celldb_v2: update_parents"); + add_visited_cells( + executor({std::move(new_cells_leaves)}, [&](CellInfo *info, auto &worker) { update_parents(info, worker); })); + } + { + td::PerfWarningTimer timer("dec"); + std::vector prepared_to_dec; + for (auto &cell : to_dec_) { + auto &info = cell_db_reader_->cell_info(cell); + prepared_to_dec.push_back(&info); + } + add_visited_cells( + executor({std::move(prepared_to_dec)}, [&](CellInfo *info, auto &worker) { dec_cell(info, worker); })); + } + + td::PerfWarningTimer timer_serialize("celldb_v2: save_diff_serialize", 0.01); + // LOG(INFO) << "threads_n = " << options_.extra_threads + 1; + diff_chunks_ = executor.operator()( + visited_cells, [&](CellInfo *info, auto &worker) { serialize_diff(info, worker); }); + timer_serialize.reset(); + + { + td::PerfWarningTimer timer("celldb_v2: clear"); + to_inc_.clear(); + to_dec_.clear(); + } + + //cell_db_reader_->dump(); + return td::Status::OK(); + } + + td::Status commit(CellStorer &storer) override { + prepare_commit(); + save_diff(storer); + // We DON'T delete entries from cache, so cache actually represents diff with snapshot in reader + // But we don't want took keep old snapshot forever + LOG_IF(ERROR, dbg) << "clear cell_db_reader"; + //cell_db_reader_->dump(); + //TODO: call drop_cache reliably via rtti + + constexpr bool always_drop_cache = false; + if (always_drop_cache) { + td::PerfWarningTimer timer("celldb_v2: reset reader"); + cell_db_reader_->drop_cache(); + cache_stats_.apply_diff(cell_db_reader_->get_stats()); + cache_stats_.stats_int["commits"] += 1; + cell_db_reader_ = {}; + // keep atomic reader, to it will be reused + } + return td::Status::OK(); + } + + std::shared_ptr get_cell_db_reader() override { + CHECK(cell_db_reader_); + return cell_db_reader_; + } + + td::Status set_loader(std::unique_ptr loader) override { + if (cell_db_reader_) { + auto cache_size = cell_db_reader_->cache_size(); + bool force_drop_cache = cell_db_reader_->force_drop_cache(); + if (loader && cache_size < options_.cache_size_max && cell_db_reader_ttl_ < options_.cache_ttl_max && + !force_drop_cache) { + // keep cache + cell_db_reader_ttl_++; + return td::Status::OK(); + } + + td::PerfWarningTimer timer(PSTRING() << "celldb_v2: reset reader, TTL=" << cell_db_reader_ttl_ << "/" + << options_.cache_ttl_max << ", cache_size=" << cache_size + << ", force_drop_cache=" << force_drop_cache); + cache_stats_.apply_diff(cell_db_reader_->get_stats()); + cell_db_reader_->drop_cache(); + cell_db_reader_ = {}; + meta_db_fixup_ = {}; + cell_db_reader_ttl_ = 0; + } + + if (loader) { + cell_db_reader_ = std::make_shared(std::move(loader)); + cell_db_reader_ttl_ = 0; + } + + { + std::lock_guard guard(atomic_cell_db_reader_mutex_); + atomic_cell_db_reader_ = cell_db_reader_; + } + return td::Status::OK(); + } + + void set_celldb_compress_depth(td::uint32 value) override { + CHECK(value == 0); + } + + vm::ExtCellCreator &as_ext_cell_creator() override { + CHECK(cell_db_reader_); + return *cell_db_reader_; + } + td::Result get_stats() override { + auto ps = stats_.nc.get_stats().with_prefix("storage_"); + ps.apply_diff(cache_stats_.with_prefix("cache_cum_")); + if (cell_db_reader_) { + ps.apply_diff(cell_db_reader_->get_stats().with_prefix("cache_now_")); + ps.apply_diff(cell_db_reader_->get_stats().with_prefix("cache_cum_")); + } + Stats res; + res.named_stats = std::move(ps); + res.named_stats.stats_int["cache.size"] = cell_db_reader_ ? cell_db_reader_->cache_size() : 0; + res.named_stats.stats_int["cache.size_max"] = options_.cache_size_max; + res.named_stats.stats_int["cache.ttl"] = cell_db_reader_ttl_; + res.named_stats.stats_int["cache.ttl_max"] = options_.cache_ttl_max; + return res; + } + + private: + static td::NamedThreadSafeCounter::CounterRef get_thread_safe_counter() { + static auto res = td::NamedThreadSafeCounter::get_default().get_counter("DynamicBagOfCellsDb"); + return res; + } + + class CellDbReaderImpl : public CellDbReaderExt, + public ExtCellCreator, + public std::enable_shared_from_this { + public: + explicit CellDbReaderImpl(std::unique_ptr cell_loader) : cell_loader_(std::move(cell_loader)) { + } + + size_t cache_size() const { + // NOT thread safe + if (internal_storage_) { + return internal_storage_->cache_size(); + } + return 0; + } + bool force_drop_cache() const { + // NOT thread safe + if (internal_storage_) { + return internal_storage_->force_drop_cache(); + } + return false; + } + void drop_cache() { + // NOT thread safe + internal_storage_.reset(); + } + + td::Result> ext_cell(Cell::LevelMask level_mask, td::Slice hash, td::Slice depth) override { + // thread safe function + stats_.ext_cells.inc(); + TRY_RESULT(ext_cell, DynamicBocExtCell::create(PrunnedCellInfo{level_mask, hash, depth}, + DynamicBocExtCellExtra{shared_from_this()})); + + return ext_cell; + } + CellInfo *register_ext_cell_inner(Ref ext_cell, CellInfoStorage &storage) { + auto &info = storage.create_cell_info(std::move(ext_cell), this, stats_); + return &info; + } + + void load_cell_async(td::Slice hash, std::shared_ptr executor, td::Promise> promise) { + // thread safe function + stats_.load_cell_async.inc(); + auto maybe_cell = load_cell_fast_path(hash, false, nullptr); + if (maybe_cell.not_null()) { + stats_.load_cell_async_cache_hits.inc(); + return promise.set_value(std::move(maybe_cell)); + } + auto promise_ptr = std::make_shared>>(std::move(promise)); + + executor->execute_async( + [self = shared_from_this(), promise_ptr = std::move(promise_ptr), hash = CellHash::from_slice(hash)]() { + promise_ptr->set_result(self->load_cell(hash.as_slice())); + }); + } + + td::Result> load_cell(td::Slice hash) override { + // thread safe function + stats_.load_cell_sync.inc(); + bool loaded{false}; + auto maybe_cell = load_cell_fast_path(hash, true, &loaded); + if (maybe_cell.not_null()) { + if (!loaded) { + stats_.load_cell_sync_cache_hits.inc(); + } + return maybe_cell; + } + return load_cell_slow_path(hash); + } + + td::Result> load_ext_cell(Ref ext_cell) override { + // thread safe function. + // Called by external cell + stats_.load_cell_ext.inc(); + auto storage = weak_storage_.lock(); + if (!storage) { + TRY_RESULT(load_result, load_cell_no_cache(ext_cell->get_hash().as_slice())); + return load_result.cell_; + } + // we delayed registering ext cell till this moment + auto cell_info = register_ext_cell_inner(std::move(ext_cell), *storage); + + CHECK(cell_info != nullptr); // currently all ext_cells are registered in cache + if (!cell_info->cell->is_loaded()) { + sync_with_db(*cell_info, true); + CHECK(cell_info->cell->is_loaded()); // critical, better to fail + } else { + stats_.load_cell_ext_cache_hits.inc(); + } + return cell_info->cell->load_cell().move_as_ok().data_cell; + } + + CellInfo &cell_info(Ref cell) { + // thread safe function, but called only by DB + CHECK(internal_storage_) + return internal_storage_->create_cell_info(std::move(cell), this, stats_); + } + + std::pair sync_with_db(CellInfo &info, bool need_data) { + // thread safe function, but called only by DB + auto effective_need_data = need_data; + if (info.cell->is_loaded()) { + effective_need_data = false; + } + return info.state.update([&](CellInfo::State state) -> std::optional { + if (state.sync_with_db) { + return {}; + } + stats_.sync_with_db.inc(); + if (!effective_need_data) { + stats_.sync_with_db_only_ref.inc(); + } + auto load_result = + cell_loader_->load(info.cell->get_hash().as_slice(), effective_need_data, *this).move_as_ok(); + + state.sync_with_db = true; + if (load_result.status == CellLoader::LoadResult::NotFound) { + CHECK(state.in_db == false); + CHECK(state.db_ref_cnt == 0); + stats_.kv_read_not_found.inc(); + return state; + } + stats_.kv_read_found.inc(); + + state.in_db = true; + state.db_ref_cnt = load_result.refcnt() + state.db_refcnt_fixup; + if (load_result.cell().not_null()) { + info.cell->set_data_cell(std::move(load_result.cell())); + } + CHECK(!need_data || info.cell->is_loaded()); + return state; + }); + } + + void dump() { + internal_storage_->dump(); + } + + td::NamedStats get_stats() const { + return stats_.nc.get_stats(); + } + td::KeyValueReader &key_value_reader() { + return cell_loader_->key_value_reader(); + } + + private: + static td::NamedThreadSafeCounter::CounterRef get_thread_safe_counter() { + static auto res = td::NamedThreadSafeCounter::get_default().get_counter("DynamicBagOfCellsDbLoader"); + return res; + } + std::shared_ptr internal_storage_{std::make_shared()}; + std::weak_ptr weak_storage_{internal_storage_}; + std::unique_ptr cell_loader_; + CacheStats stats_; + + Ref load_cell_fast_path(td::Slice hash, bool may_block, bool *loaded) { + auto storage = weak_storage_.lock(); + if (!storage) { + return {}; + } + auto cell_info = storage->get_cell_info(hash); + if (cell_info != nullptr) { + if (!cell_info->cell->is_loaded()) { + if (may_block) { + if (loaded) { + *loaded = true; + } + CHECK(cell_info->state.load().in_db); + sync_with_db(*cell_info, true); + CHECK(cell_info->cell->is_loaded()); + } else { + return {}; + } + } + return cell_info->cell->load_cell().move_as_ok().data_cell; + } + return {}; + } + td::Result load_cell_no_cache(td::Slice hash) { + stats_.load_cell_no_cache.inc(); + TRY_RESULT(load_result, cell_loader_->load(hash, true, *this)); + if (load_result.status == CellLoader::LoadResult::NotFound) { + stats_.kv_read_not_found.inc(); + return td::Status::Error("Cell load failed: not in db"); + } + stats_.kv_read_found.inc(); + return load_result; + } + td::Result> load_cell_slow_path(td::Slice hash) { + TRY_RESULT(load_result, load_cell_no_cache(hash)); + auto storage = weak_storage_.lock(); + if (!storage) { + return load_result.cell_; + } + auto &cell_info = storage->create_cell_info_from_db(std::move(load_result.cell()), load_result.refcnt()); + return cell_info.cell->load_cell().move_as_ok().data_cell; + } + }; + + CreateV2Options options_; + std::vector> to_inc_; + std::vector> to_dec_; + std::vector> diff_chunks_; + std::vector meta_diffs_; + std::map> meta_db_fixup_; + + mutable std::mutex atomic_cell_db_reader_mutex_; + std::shared_ptr atomic_cell_db_reader_; + + std::shared_ptr cell_db_reader_; + size_t cell_db_reader_ttl_{0}; + td::NamedStats cache_stats_; + CommitStats stats_; + bool dbg{false}; + + template + void gather_new_cells(CellInfo *info, WorkerT &worker) { + stats_.gather_new_cells_calls.inc(); + do { + // invariant: info is not in DB; with created in_db_info + // we enter into each root only once + stats_.gather_new_cells_calls_it.inc(); + stats_.new_cells.inc(); + auto &in_db_info = info->in_db_info(); + + CellSlice cs(vm::NoVm{}, info->cell); // ensure cell is loaded + CellInfo *prev_child_info = nullptr; + while (cs.have_refs()) { + auto *child_info = &cell_db_reader_->cell_info(cs.fetch_ref()); + auto child_state = child_info->state.load(); + + if (child_state.in_db) { + LOG_IF(INFO, dbg) << "gather_new_cells: IN DB\n\tchld: " << *child_info; + continue; + } + + auto &child_in_db_info = child_info->in_db_info_create(info); + in_db_info.pending_children.fetch_add(1, std::memory_order_relaxed); + + if (child_in_db_info.visited_in_gather_new_cells.exchange(true)) { + continue; + } + + if (prev_child_info != nullptr) { + worker.add_task(prev_child_info); + } + prev_child_info = child_info; + } + LOG_IF(INFO, dbg) << "gather_new_cells: NOT IN DB\n\t" << *info; + if (in_db_info.pending_children.load(std::memory_order_relaxed) == 0) { + worker.add_result(info); + stats_.new_cells_leaves.inc(); + LOG_IF(WARNING, dbg) << "gather_new_cells: ADD LEAVE\n\t" << *info; + } + info = prev_child_info; + } while (info != nullptr); + } + + template + void update_parents(CellInfo *info, const WorkerT &worker) { + stats_.update_parents_calls.inc(); + size_t it = 0; + do { + stats_.update_parents_calls_it.inc(); + it++; + //LOG(INFO) << "update_parents: it=" << it << "\n\t"; + auto &in_db_info = info->in_db_info(); + bool in_db = false; + if (in_db_info.maybe_in_db.load(std::memory_order_relaxed)) { + auto [state, loaded] = cell_db_reader_->sync_with_db(*info, false); + in_db = state.in_db; + if (in_db) { + stats_.new_cells_loaded_in_db.inc(); + } else { + stats_.new_cells_loaded_not_in_db.inc(); + } + } else { + stats_.new_cells_not_in_db_fast.inc(); + info->set_not_in_db(); + } + LOG_IF(INFO, dbg) << "update_parents: it=" << it << "\n\t" << *info; + + CellInfo *prev_parent{nullptr}; + for (auto &parent : in_db_info.parents) { + auto &parent_in_db_info = parent->in_db_info(); + if (!in_db) { + parent_in_db_info.maybe_in_db.store(false, std::memory_order_relaxed); + } + if (parent_in_db_info.pending_children.fetch_sub(1, std::memory_order_release) == 1) { + if (prev_parent) { + worker.add_task(prev_parent); + } + prev_parent = parent; + } + } + if (!in_db) { + CellSlice cs(vm::NoVm{}, info->cell); + while (cs.have_refs()) { + auto child = cs.fetch_ref(); + auto &child_info = cell_db_reader_->cell_info(std::move(child)); + if (child_info.inc_ref_cnt() == 1 && child_info.visit()) { + worker.add_result(&child_info); + } + } + } + info->in_db_info_destroy(); + info = prev_parent; + } while (info); + } + + template + void dec_cell(CellInfo *info, WorkerT &worker) { + stats_.dec_calls.inc(); + + while (true) { + stats_.dec_calls_it.inc(); + if (info->visit()) { + worker.add_result(info); + } + auto ref_cnt_diff = info->dec_ref_cnt(); + if (ref_cnt_diff > 0) { + LOG_IF(INFO, dbg) << "NOT DEC" + << "\n\t" << info; + break; + } + auto state = info->state.load(); + if (ref_cnt_diff == 0 && state.in_db) { + LOG_IF(INFO, dbg) << "NOT DEC (in_db) " + << "\n\t" << info; + break; + } + if (!state.sync_with_db) { + state = cell_db_reader_->sync_with_db(*info, true).first; + stats_.dec_loaded.inc(); + CHECK(ref_cnt_diff == 0 || state.in_db); + } + auto ref_cnt = state.db_ref_cnt + ref_cnt_diff; + if (ref_cnt > 0) { + LOG_IF(INFO, dbg) << "DEC " << ref_cnt << "\n\t" << info; + } else { + LOG_IF(ERROR, dbg) << "DEC " << ref_cnt << "\n\t" << info; + } + CHECK(ref_cnt >= 0); + if (ref_cnt > 0) { + break; + } + stats_.dec_to_zero.inc(); + CellSlice cs(vm::NoVm{}, info->cell); + if (!cs.have_refs()) { + break; + } + while (cs.size_refs() > 1) { + worker.add_task(&cell_db_reader_->cell_info(cs.fetch_ref())); + } + info = &cell_db_reader_->cell_info(cs.fetch_ref()); + } + } + + template + void serialize_diff(CellInfo *info, Worker &worker) { + info->visited.store(false, std::memory_order_relaxed); + auto ref_cnt_diff = info->get_ref_cnt_diff(); + if (ref_cnt_diff == 0) { + stats_.diff_zero.inc(); + return; + } + + bool merge_supported = true; + if (merge_supported) { + auto state = info->state.load(); + if (ref_cnt_diff < 0) { + CHECK(state.sync_with_db); + /* + if (state.db_ref_cnt + ref_cnt_diff == 0) { + LOG(ERROR) << "DEC ERASE " << info->cell->get_hash().to_hex(); + } else { + LOG(ERROR) << "DEC MERGE " << info->cell->get_hash().to_hex() << *info; + } + */ + } + if (ref_cnt_diff < 0 && state.sync_with_db && state.db_ref_cnt + ref_cnt_diff == 0) { + // Erase is better than Merge+CompactionFilter + // So I see no reason for CompactionFilter at all + worker.add_result({.type = CellStorer::Diff::Erase, .key = info->cell->get_hash()}); + stats_.diff_erase.inc(); + } else { + bool with_data = ref_cnt_diff > 0 && !state.in_db; + if (with_data) { + CHECK(state.sync_with_db); + auto data_cell = info->cell->load_cell().move_as_ok().data_cell; + stats_.diff_full.inc(); + worker.add_result({.type = CellStorer::Diff::Set, + .key = info->cell->get_hash(), + .value = CellStorer::serialize_value(ref_cnt_diff + state.db_ref_cnt, data_cell, false)}); + } else { + stats_.diff_ref_cnt.inc(); + worker.add_result({.type = CellStorer::Diff::Merge, + .key = info->cell->get_hash(), + .value = CellStorer::serialize_refcnt_diffs(ref_cnt_diff)}); + } + } + info->on_written_to_db(); + return; + } + + auto state = info->state.load(); + if (!state.sync_with_db) { + stats_.changes_loaded.inc(); + state = cell_db_reader_->sync_with_db(*info, true).first; + } + CHECK(state.sync_with_db); + auto new_ref_cnt = ref_cnt_diff + state.db_ref_cnt; + + if (ref_cnt_diff < 0) { + stats_.dec_save.inc(); + if (new_ref_cnt == 0) { + stats_.dec_erase_cell.inc(); + + LOG_IF(ERROR, dbg) << "DEC ERASE " << *info; + worker.add_result({.type = CellStorer::Diff::Erase, .key = info->cell->get_hash()}); + stats_.dec_save_erase.inc(); + } else { + stats_.dec_just_ref_cnt.inc(); + + LOG_IF(ERROR, dbg) << "DEC REFCNT " << *info; + CHECK(info->cell->is_loaded()); + worker.add_result( + {.type = CellStorer::Diff::Set, + .key = info->cell->get_hash(), + .value = CellStorer::serialize_value(new_ref_cnt, info->cell->load_cell().move_as_ok().data_cell, false)}); + stats_.dec_save_full.inc(); + } + } else { + stats_.inc_save.inc(); + CHECK(info->cell->is_loaded()); + if (state.db_ref_cnt == 0) { + stats_.inc_new_cell.inc(); + LOG_IF(ERROR, dbg) << "INC CREATE " << *info; + } else { + stats_.inc_just_ref_cnt.inc(); + LOG_IF(ERROR, dbg) << "INC REFCNT " << *info; + } + + worker.add_result( + {.type = CellStorer::Diff::Set, + .key = info->cell->get_hash(), + .value = CellStorer::serialize_value(new_ref_cnt, info->cell->load_cell().move_as_ok().data_cell, false)}); + stats_.inc_save_full.inc(); + } + } + + void save_diff(CellStorer &storer) { + td::PerfWarningTimer timer("celldb_v2: save_diff"); + td::PerfWarningTimer timer_store_to_db("celldb_v2: save_diff_store_to_db", 0.01); + // Have no idea hot to parallelize this in case of rocksdb + for (auto &diffs : diff_chunks_) { + for (auto &diff : diffs) { + storer.apply_diff(diff).ensure(); + } + } + for (auto &meta_diff : meta_diffs_) { + meta_db_fixup_[meta_diff.key] = meta_diff.value; + storer.apply_meta_diff(meta_diff).ensure(); + } + timer_store_to_db.reset(); + td::PerfWarningTimer timer_clear("celldb_v2: save_diff_clear"); + diff_chunks_.clear(); + meta_diffs_.clear(); + timer_clear.reset(); + } +}; +} // namespace + +std::unique_ptr DynamicBagOfCellsDb::create_v2(CreateV2Options options) { + return std::make_unique(options); +} +} // namespace vm diff --git a/crypto/vm/db/InMemoryBagOfCellsDb.cpp b/crypto/vm/db/InMemoryBagOfCellsDb.cpp index 03cad0934..e43cfde4e 100644 --- a/crypto/vm/db/InMemoryBagOfCellsDb.cpp +++ b/crypto/vm/db/InMemoryBagOfCellsDb.cpp @@ -413,6 +413,7 @@ class CellStorage { size_t dense_ht_size = 0; size_t new_ht_size = 0; for_each_bucket(0, [&](auto bucket_id, CellBucket &bucket) { + // TODO: this leads to CE when use_dense_hash_map == false dense_ht_capacity += bucket.infos_.dense_ht_values_.size(); dense_ht_size += bucket.infos_.dense_ht_size_; new_ht_capacity += bucket.infos_.new_ht_.bucket_count(); @@ -468,6 +469,14 @@ class CellStorage { } return td::Status::Error("not found"); } + td::Result>> load_known_roots_local() const { + auto lock = local_access_.lock(); + std::vector> result; + for (auto &root : roots_) { + result.emplace_back(root); + } + return result; + } td::Result> load_root_shared(const CellHash &hash) const { std::lock_guard lock(root_mutex_); if (auto it = roots_.find(hash); it != roots_.end()) { @@ -620,7 +629,7 @@ class CellStorage { sb << "\n\t" << key << "=" << value; } LOG_IF(ERROR, desc_count != 0 && desc_count != stats.roots_total_count + 1) - << "desc<> keys count is " << desc_count << " wich is different from roots count " << stats.roots_total_count; + << "desc<> keys count is " << desc_count << " which is different from roots count " << stats.roots_total_count; LOG_IF(WARNING, verbose) << P << "done in " << full_timer.elapsed() << "\n\troots_count=" << stats.roots_total_count << "\n\t" << desc_count << "\n\tcells_count=" << stats.cells_total_count @@ -757,15 +766,84 @@ class CellStorage { } }; +class MetaStorage { + public: + explicit MetaStorage(std::vector> values) + : meta_(std::move_iterator(values.begin()), std::move_iterator(values.end())) { + for (auto &p : meta_) { + CHECK(p.first.size() != CellTraits::hash_bytes); + } + } + std::vector> meta_get_all(size_t max_count) const { + std::vector> res; + for (const auto &[k, v] : meta_) { + if (res.size() >= max_count) { + break; + } + res.emplace_back(k, v); + } + return res; + } + KeyValue::GetStatus meta_get(td::Slice key, std::string &value) const { + auto lock = local_access_.lock(); + auto it = meta_.find(key.str()); + if (it == meta_.end()) { + return KeyValue::GetStatus::NotFound; + } + value = it->second; + return KeyValue::GetStatus::Ok; + } + void meta_set(td::Slice key, td::Slice value) { + auto lock = local_access_.lock(); + meta_[key.str()] = value.str(); + meta_diffs_.push_back( + CellStorer::MetaDiff{.type = CellStorer::MetaDiff::Set, .key = key.str(), .value = value.str()}); + } + void meta_erase(td::Slice key) { + auto lock = local_access_.lock(); + meta_.erase(key.str()); + meta_diffs_.push_back(CellStorer::MetaDiff{.type = CellStorer::MetaDiff::Erase, .key = key.str()}); + } + std::vector extract_diffs() { + auto lock = local_access_.lock(); + return std::move(meta_diffs_); + } + + private: + mutable UniqueAccess local_access_; + std::unordered_map meta_; + std::vector meta_diffs_; +}; + class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb { public: - explicit InMemoryBagOfCellsDb(td::unique_ptr storage) : storage_(std::move(storage)) { + explicit InMemoryBagOfCellsDb(td::unique_ptr storage, td::unique_ptr meta_storage) + : storage_(std::move(storage)), meta_storage_(std::move(meta_storage)) { + } + + td::Result>> meta_get_all(size_t max_count) const override { + return meta_storage_->meta_get_all(max_count); + } + td::Result meta_get(td::Slice key, std::string &value) override { + CHECK(key.size() != CellTraits::hash_bytes); + return meta_storage_->meta_get(key, value); + } + td::Status meta_set(td::Slice key, td::Slice value) override { + meta_storage_->meta_set(key, value); + return td::Status::OK(); + } + td::Status meta_erase(td::Slice key) override { + meta_storage_->meta_erase(key); + return td::Status::OK(); } td::Result> load_cell(td::Slice hash) override { return storage_->load_cell(CellHash::from_slice(hash)); } + td::Result>> load_known_roots() const override { + return storage_->load_known_roots_local(); + } td::Result> load_root(td::Slice hash) override { return storage_->load_root_local(CellHash::from_slice(hash)); } @@ -798,29 +876,37 @@ class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb { TRY_STATUS(prepare_commit()); } + td::PerfWarningTimer times_save_diff("save diff"); Stats diff; CHECK(to_dec_.empty()); - for (auto &it : info_) { - auto &info = it.second; + for (auto &info : info_) { if (info.diff_refcnt == 0) { continue; } auto refcnt = td::narrow_cast(static_cast(info.db_refcnt) + info.diff_refcnt); - CHECK(refcnt >= 0); + LOG_CHECK(refcnt >= 0) << info.db_refcnt << " + " << info.diff_refcnt; if (refcnt > 0) { - cell_storer.set(refcnt, info.cell, false); + if (info.db_refcnt == 0) { + TRY_STATUS(cell_storer.set(refcnt, info.cell, false)); + } else { + TRY_STATUS(cell_storer.merge(info.cell->get_hash().as_slice(), info.diff_refcnt)); + } storage_->set(refcnt, info.cell); if (info.db_refcnt == 0) { diff.cells_total_count++; diff.cells_total_size += static_cast(info.cell->get_storage_size()); } } else { - cell_storer.erase(info.cell->get_hash().as_slice()); + TRY_STATUS(cell_storer.erase(info.cell->get_hash().as_slice())); storage_->erase(info.cell->get_hash()); diff.cells_total_count--; diff.cells_total_size -= static_cast(info.cell->get_storage_size()); } } + auto meta_diffs = meta_storage_->extract_diffs(); + for (const auto &meta_diff : meta_diffs) { + TRY_STATUS(cell_storer.apply_meta_diff(meta_diff)); + } storage_->apply_stats_diff(diff); info_ = {}; return td::Status::OK(); @@ -872,13 +958,39 @@ class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb { private: td::unique_ptr storage_; + td::unique_ptr meta_storage_; struct Info { - td::int32 db_refcnt{0}; - td::int32 diff_refcnt{0}; + mutable td::int32 db_refcnt{0}; + mutable td::int32 diff_refcnt{0}; Ref cell; + vm::CellHash key() const { + return cell->get_hash(); + } + struct Eq { + using is_transparent = void; // Pred to use + bool operator()(const Info &info, const Info &other_info) const { + return info.key() == other_info.key(); + } + bool operator()(const Info &info, td::Slice hash) const { + return info.key().as_slice() == hash; + } + bool operator()(td::Slice hash, const Info &info) const { + return info.key().as_slice() == hash; + } + }; + struct Hash { + using is_transparent = void; // Pred to use + using transparent_key_equal = Eq; + size_t operator()(td::Slice hash) const { + return cell_hash_slice_hash(hash); + } + size_t operator()(const Info &info) const { + return cell_hash_slice_hash(info.key().as_slice()); + } + }; }; - td::HashMap info_; + td::HashSet info_; std::unique_ptr loader_; std::vector> to_inc_; @@ -886,13 +998,13 @@ class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb { Ref do_inc(Ref cell) { auto cell_hash = cell->get_hash(); - if (auto it = info_.find(cell_hash); it != info_.end()) { - CHECK(it->second.diff_refcnt != std::numeric_limits::max()); - it->second.diff_refcnt++; - return it->second.cell; + if (auto it = info_.find(cell_hash.as_slice()); it != info_.end()) { + CHECK(it->diff_refcnt != std::numeric_limits::max()); + it->diff_refcnt++; + return it->cell; } if (auto o_info = storage_->get_info(cell_hash)) { - info_.emplace(cell_hash, Info{.db_refcnt = o_info->db_refcnt, .diff_refcnt = 1, .cell = o_info->cell}); + info_.emplace(Info{.db_refcnt = o_info->db_refcnt, .diff_refcnt = 1, .cell = o_info->cell}); return std::move(o_info->cell); } @@ -905,21 +1017,21 @@ class InMemoryBagOfCellsDb : public DynamicBagOfCellsDb { } auto res = cb.finalize(cs.is_special()); CHECK(res->get_hash() == cell_hash); - info_.emplace(cell_hash, Info{.db_refcnt = 0, .diff_refcnt = 1, .cell = res}); + info_.emplace(Info{.db_refcnt = 0, .diff_refcnt = 1, .cell = res}); return res; } void do_dec(Ref cell) { auto cell_hash = cell->get_hash(); - auto it = info_.find(cell_hash); + auto it = info_.find(cell_hash.as_slice()); if (it != info_.end()) { - CHECK(it->second.diff_refcnt != std::numeric_limits::min()); - --it->second.diff_refcnt; + CHECK(it->diff_refcnt != std::numeric_limits::min()); + --it->diff_refcnt; } else { auto info = *storage_->get_info(cell_hash); - it = info_.emplace(cell_hash, Info{.db_refcnt = info.db_refcnt, .diff_refcnt = -1, .cell = info.cell}).first; + it = info_.emplace(Info{.db_refcnt = info.db_refcnt, .diff_refcnt = -1, .cell = info.cell}).first; } - if (it->second.diff_refcnt + it->second.db_refcnt != 0) { + if (it->diff_refcnt + it->db_refcnt != 0) { return; } CellSlice cs(NoVm{}, std::move(cell)); @@ -936,7 +1048,8 @@ std::unique_ptr DynamicBagOfCellsDb::create_in_memory(td::K if (kv == nullptr) { LOG_IF(WARNING, options.verbose) << "Create empty in-memory cells database (no key value is given)"; auto storage = CellStorage::build(options, [](auto, auto, auto) { return std::make_pair(0, 0); }); - return std::make_unique(std::move(storage)); + auto meta_storage = td::make_unique(std::vector>{}); + return std::make_unique(std::move(storage), std::move(meta_storage)); } std::vector keys; @@ -962,6 +1075,9 @@ std::unique_ptr DynamicBagOfCellsDb::create_in_memory(td::K local_desc_count++; return td::Status::OK(); } + if (key.size() != 32) { + return td::Status::OK(); + } auto r_res = CellLoader::load(key, value.str(), true, pc_creator); if (r_res.is_error()) { LOG(ERROR) << r_res.error() << " at " << td::format::escaped(key); @@ -983,6 +1099,24 @@ std::unique_ptr DynamicBagOfCellsDb::create_in_memory(td::K }; auto storage = CellStorage::build(options, parallel_scan_cells); - return std::make_unique(std::move(storage)); + + std::vector> meta; + // NB: it scans 1/(2^32) of the database which is not much + kv->for_each_in_range("desc", "desd", [&meta](td::Slice key, td::Slice value) { + if (key.size() != 32) { + meta.emplace_back(key.str(), value.str()); + } + return td::Status::OK(); + }); + // this is for tests mostly. desc* keys are expected to correspond to roots + kv->for_each_in_range("meta", "metb", [&meta](td::Slice key, td::Slice value) { + if (key.size() != 32) { + meta.emplace_back(key.str(), value.str()); + } + return td::Status::OK(); + }); + auto meta_storage = td::make_unique(std::move(meta)); + + return std::make_unique(std::move(storage), std::move(meta_storage)); } } // namespace vm diff --git a/crypto/vm/db/StaticBagOfCellsDb.cpp b/crypto/vm/db/StaticBagOfCellsDb.cpp index 80dbfbf0b..c65d2624f 100644 --- a/crypto/vm/db/StaticBagOfCellsDb.cpp +++ b/crypto/vm/db/StaticBagOfCellsDb.cpp @@ -40,6 +40,9 @@ class RootCell : public Cell { struct PrivateTag {}; public: + td::Status set_data_cell(Ref &&data_cell) const override { + return cell_->set_data_cell(std::move(data_cell)); + } td::Result load_cell() const override { return cell_->load_cell(); } @@ -94,11 +97,11 @@ class DataCellCacheNoop { class DataCellCacheMutex { public: Ref store(int idx, Ref cell) { - auto lock = cells_rw_mutex_.lock_write(); + std::lock_guard lock(mutex_); return cells_.emplace(idx, std::move(cell)).first->second; } Ref load(int idx) { - auto lock = cells_rw_mutex_.lock_read(); + std::lock_guard lock(mutex_); auto it = cells_.find(idx); if (it != cells_.end()) { return it->second; @@ -106,12 +109,13 @@ class DataCellCacheMutex { return {}; } void clear() { - auto guard = cells_rw_mutex_.lock_write(); + std::lock_guard lock(mutex_); cells_.clear(); } private: - td::RwMutex cells_rw_mutex_; + std::mutex mutex_; + // NB: in case of high contention, one should use multiple buckets with per bucket mutexes td::HashMap> cells_; }; @@ -246,7 +250,7 @@ class StaticBagOfCellsDbLazyImpl : public StaticBagOfCellsDb { BagOfCells::Info info_; std::mutex index_i_mutex_; - td::RwMutex index_data_rw_mutex_; + std::mutex index_mutex_; std::string index_data_; std::atomic index_i_{0}; size_t index_offset_{0}; @@ -319,7 +323,7 @@ class StaticBagOfCellsDbLazyImpl : public StaticBagOfCellsDb { info_.index_offset + (td::int64)idx * info_.offset_byte_size)); offset_view = new_offset_view; } else { - guard = index_data_rw_mutex_.lock_read().move_as_ok(); + std::lock_guard guard(index_mutex_); offset_view = td::Slice(index_data_).substr((td::int64)idx * info_.offset_byte_size, info_.offset_byte_size); } @@ -432,7 +436,7 @@ class StaticBagOfCellsDbLazyImpl : public StaticBagOfCellsDb { } td::uint8 tmp[8]; info_.write_offset(tmp, index_offset_); - auto guard = index_data_rw_mutex_.lock_write(); + std::lock_guard guard(index_mutex_); index_data_.append(reinterpret_cast(tmp), info_.offset_byte_size); } return td::Status::OK(); diff --git a/tddb/td/db/KeyValue.h b/tddb/td/db/KeyValue.h index 12c3a4f8d..c3f83919b 100644 --- a/tddb/td/db/KeyValue.h +++ b/tddb/td/db/KeyValue.h @@ -20,19 +20,51 @@ #include "td/utils/Status.h" #include "td/utils/Time.h" #include "td/utils/logging.h" +#include "td/utils/ThreadSafeCounter.h" #include namespace td { +struct UsageStats { + size_t get_count{}; + size_t get_found_count{}; + size_t get_not_found_count{}; + size_t set_count{}; + UsageStats operator+(const UsageStats& other) const { + return UsageStats{.get_count = get_count + other.get_count, + .get_found_count = get_found_count + other.get_found_count, + .get_not_found_count = get_not_found_count + other.get_not_found_count, + .set_count = set_count + other.set_count}; + } + UsageStats operator-(const UsageStats& other) const { + return UsageStats{.get_count = get_count - other.get_count, + .get_found_count = get_found_count - other.get_found_count, + .get_not_found_count = get_not_found_count - other.get_not_found_count, + .set_count = set_count - other.set_count}; + } + NamedStats to_named_stats() const { + NamedStats ns; + ns.stats_int["usage_get_count"] += get_count; + ns.stats_int["usage_get_found_count"] += get_found_count; + ns.stats_int["usage_get_not_found_count"] += get_not_found_count; + ns.stats_int["usage_set_count"] += set_count; + return ns; + } +}; +inline td::StringBuilder& operator<<(td::StringBuilder& sb, const UsageStats& stats) { + sb << "get: " << stats.get_count << ", +" << stats.get_found_count << ", -" << stats.get_not_found_count; + return sb; +} + class KeyValueReader { public: virtual ~KeyValueReader() = default; enum class GetStatus : int32 { Ok, NotFound }; - virtual Result get(Slice key, std::string &value) = 0; + virtual Result get(Slice key, std::string& value) = 0; virtual Result count(Slice prefix) = 0; virtual Status for_each(std::function f) { return Status::Error("for_each is not supported"); } - virtual Status for_each_in_range (Slice begin, Slice end, std::function f) { + virtual Status for_each_in_range(Slice begin, Slice end, std::function f) { return td::Status::Error("foreach_range is not supported"); } }; @@ -42,7 +74,7 @@ class PrefixedKeyValueReader : public KeyValueReader { PrefixedKeyValueReader(std::shared_ptr reader, Slice prefix) : reader_(std::move(reader)), prefix_(prefix.str()) { } - Result get(Slice key, std::string &value) override { + Result get(Slice key, std::string& value) override { return reader_->get(PSLICE() << prefix_ << key, value); } Result count(Slice prefix) override { @@ -54,14 +86,16 @@ class PrefixedKeyValueReader : public KeyValueReader { std::string prefix_; }; -class KeyValueUtils { - public: -}; - class KeyValue : public KeyValueReader { public: virtual Status set(Slice key, Slice value) = 0; virtual Status erase(Slice key) = 0; + virtual Status merge(Slice key, Slice value) { + return Status::Error("merge is not supported"); + } + virtual Status run_gc() { + return Status::OK(); + } virtual Status begin_write_batch() = 0; virtual Status commit_write_batch() = 0; @@ -80,12 +114,15 @@ class KeyValue : public KeyValueReader { virtual Status flush() { return Status::OK(); } + virtual UsageStats get_usage_stats() { + return {}; + } }; class PrefixedKeyValue : public KeyValue { public: PrefixedKeyValue(std::shared_ptr kv, Slice prefix) : kv_(std::move(kv)), prefix_(prefix.str()) { } - Result get(Slice key, std::string &value) override { + Result get(Slice key, std::string& value) override { return kv_->get(PSLICE() << prefix_ << key, value); } Result count(Slice prefix) override { diff --git a/tddb/td/db/MemoryKeyValue.cpp b/tddb/td/db/MemoryKeyValue.cpp index 080133602..7105f72b9 100644 --- a/tddb/td/db/MemoryKeyValue.cpp +++ b/tddb/td/db/MemoryKeyValue.cpp @@ -22,57 +22,99 @@ namespace td { Result MemoryKeyValue::get(Slice key, std::string &value) { - auto it = map_.find(key); - if (it == map_.end()) { + auto bucket = lock(key); + auto &map = bucket->map; + + usage_stats_.get_count++; + auto it = map.find(key); + if (it == map.end()) { + usage_stats_.get_not_found_count++; return GetStatus::NotFound; } value = it->second; + usage_stats_.get_found_count++; return GetStatus::Ok; } +std::unique_ptr MemoryKeyValue::lock(td::Slice key) { + auto bucket_id = std::hash()(std::string_view(key.data(), key.size())) % buckets_.size(); + return lock(buckets_[bucket_id]); +} + Status MemoryKeyValue::for_each(std::function f) { - for (auto &it : map_) { - TRY_STATUS(f(it.first, it.second)); + for (auto &unlocked_bucket : buckets_) { + auto bucket = lock(unlocked_bucket); + for (auto &it : bucket->map) { + TRY_STATUS(f(it.first, it.second)); + } } return Status::OK(); } Status MemoryKeyValue::for_each_in_range(Slice begin, Slice end, std::function f) { - for (auto it = map_.lower_bound(begin); it != map_.end(); it++) { - if (it->first < end) { - TRY_STATUS(f(it->first, it->second)); - } else { - break; + for (auto &unlocked_bucket : buckets_) { + auto bucket = lock(unlocked_bucket); + auto &map = bucket->map; + for (auto it = map.lower_bound(begin); it != map.end(); it++) { + if (it->first < end) { + TRY_STATUS(f(it->first, it->second)); + } else { + break; + } } } return Status::OK(); } Status MemoryKeyValue::set(Slice key, Slice value) { - map_[key.str()] = value.str(); + auto bucket = lock(key); + auto &map = bucket->map; + + usage_stats_.set_count++; + map[key.str()] = value.str(); return Status::OK(); } +Status MemoryKeyValue::merge(Slice key, Slice update) { + CHECK(merger_); + auto bucket = lock(key); + auto &map = bucket->map; + auto &value = map[key.str()]; + merger_->merge_value_and_update(value, update); + if (value.empty()) { + map.erase(key.str()); + } + return td::Status::OK(); +} Status MemoryKeyValue::erase(Slice key) { - auto it = map_.find(key); - if (it != map_.end()) { - map_.erase(it); + auto bucket = lock(key); + auto &map = bucket->map; + auto it = map.find(key); + if (it != map.end()) { + map.erase(it); } return Status::OK(); } Result MemoryKeyValue::count(Slice prefix) { size_t res = 0; - for (auto it = map_.lower_bound(prefix); it != map_.end(); it++) { - if (Slice(it->first).truncate(prefix.size()) != prefix) { - break; + for (auto &unlocked_bucket : buckets_) { + auto bucket = lock(unlocked_bucket); + auto &map = bucket->map; + for (auto it = map.lower_bound(prefix); it != map.end(); it++) { + if (Slice(it->first).truncate(prefix.size()) != prefix) { + break; + } + res++; } - res++; } return res; } std::unique_ptr MemoryKeyValue::snapshot() { auto res = std::make_unique(); - res->map_ = map_; + for (size_t i = 0; i < buckets_.size(); i++) { + auto bucket = lock(buckets_[i]); + res->buckets_[i].map = bucket->map; + } return std::move(res); } @@ -80,10 +122,10 @@ std::string MemoryKeyValue::stats() const { return PSTRING() << "MemoryKeyValueStats{" << tag("get_count", get_count_) << "}"; } Status MemoryKeyValue::begin_write_batch() { - UNREACHABLE(); + return Status::OK(); } Status MemoryKeyValue::commit_write_batch() { - UNREACHABLE(); + return Status::OK(); } Status MemoryKeyValue::abort_write_batch() { UNREACHABLE(); diff --git a/tddb/td/db/MemoryKeyValue.h b/tddb/td/db/MemoryKeyValue.h index f0b5faa08..cf896095d 100644 --- a/tddb/td/db/MemoryKeyValue.h +++ b/tddb/td/db/MemoryKeyValue.h @@ -22,12 +22,22 @@ #include namespace td { + +struct Merger { + virtual ~Merger() = default; + virtual void merge_value_and_update(std::string &value, Slice update) = 0; + virtual void merge_update_and_update(std::string &left_update, Slice right_update) = 0; +}; class MemoryKeyValue : public KeyValue { public: - Result get(Slice key, std::string &value) override; + MemoryKeyValue() = default; + MemoryKeyValue(std::shared_ptr merger) : merger_(std::move(merger)) { + } + Result get(Slice key, std::string& value) override; Status for_each(std::function f) override; Status for_each_in_range(Slice begin, Slice end, std::function f) override; Status set(Slice key, Slice value) override; + Status merge(Slice key, Slice value) override; Status erase(Slice key) override; Result count(Slice prefix) override; @@ -43,8 +53,30 @@ class MemoryKeyValue : public KeyValue { std::string stats() const override; + UsageStats get_usage_stats() override { + return usage_stats_; + } + private: - std::map> map_; + static constexpr size_t buckets_n = 64; + struct Bucket { + std::mutex mutex; + std::map> map; + }; + struct Unlock { + void operator()(Bucket* bucket) const { + bucket->mutex.unlock(); + } + }; + std::array buckets_{}; int64 get_count_{0}; + UsageStats usage_stats_{}; + std::shared_ptr merger_; + + std::unique_ptr lock(Bucket& bucket) { + bucket.mutex.lock(); + return std::unique_ptr(&bucket); + } + std::unique_ptr lock(td::Slice key); }; } // namespace td diff --git a/tddb/td/db/RocksDb.cpp b/tddb/td/db/RocksDb.cpp index f1aa64a5d..b56f3b145 100644 --- a/tddb/td/db/RocksDb.cpp +++ b/tddb/td/db/RocksDb.cpp @@ -24,10 +24,13 @@ #include "rocksdb/write_batch.h" #include "rocksdb/utilities/optimistic_transaction_db.h" #include "rocksdb/utilities/transaction.h" +#include "td/utils/misc.h" + +#include namespace td { namespace { -static Status from_rocksdb(rocksdb::Status status) { +static Status from_rocksdb(const rocksdb::Status &status) { if (status.ok()) { return Status::OK(); } @@ -56,62 +59,83 @@ RocksDb::~RocksDb() { } RocksDb RocksDb::clone() const { + if (transaction_db_) { + return RocksDb{transaction_db_, options_}; + } return RocksDb{db_, options_}; } Result RocksDb::open(std::string path, RocksDbOptions options) { - rocksdb::OptimisticTransactionDB *db; - { - rocksdb::Options db_options; + rocksdb::Options db_options; + db_options.merge_operator = options.merge_operator; + db_options.compaction_filter = options.compaction_filter; - static auto default_cache = rocksdb::NewLRUCache(1 << 30); - if (!options.no_block_cache && options.block_cache == nullptr) { - options.block_cache = default_cache; - } + static auto default_cache = rocksdb::NewLRUCache(1 << 30); + if (!options.no_block_cache && options.block_cache == nullptr) { + options.block_cache = default_cache; + } - rocksdb::BlockBasedTableOptions table_options; - if (options.no_block_cache) { - table_options.no_block_cache = true; - } else { - table_options.block_cache = options.block_cache; - } - db_options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options)); - - db_options.use_direct_reads = options.use_direct_reads; - db_options.manual_wal_flush = true; - db_options.create_if_missing = true; - db_options.max_background_compactions = 4; - db_options.max_background_flushes = 2; - db_options.bytes_per_sync = 1 << 20; - db_options.writable_file_max_buffer_size = 2 << 14; - db_options.statistics = options.statistics; - db_options.max_log_file_size = 100 << 20; - db_options.keep_log_file_num = 1; - rocksdb::OptimisticTransactionDBOptions occ_options; - occ_options.validate_policy = rocksdb::OccValidationPolicy::kValidateSerial; + rocksdb::BlockBasedTableOptions table_options; + if (options.no_block_cache) { + table_options.no_block_cache = true; + } else { + table_options.block_cache = options.block_cache; + } + db_options.table_factory.reset(rocksdb::NewBlockBasedTableFactory(table_options)); + + // table_options.block_align = true; + if (options.no_reads) { + db_options.memtable_factory.reset(new rocksdb::VectorRepFactory()); + db_options.allow_concurrent_memtable_write = false; + } + + db_options.wal_recovery_mode = rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords; + db_options.use_direct_reads = options.use_direct_reads; + db_options.manual_wal_flush = true; + db_options.create_if_missing = true; + db_options.max_background_compactions = 4; + db_options.max_background_flushes = 2; + db_options.bytes_per_sync = 1 << 20; + db_options.writable_file_max_buffer_size = 2 << 14; + db_options.statistics = options.statistics; + db_options.max_log_file_size = 100 << 20; + db_options.keep_log_file_num = 1; + + if (options.experimental) { + // Place your experimental options here + } + + if (options.no_transactions) { + rocksdb::DB *db{nullptr}; + TRY_STATUS(from_rocksdb(rocksdb::DB::Open(db_options, std::move(path), &db))); + return RocksDb(std::shared_ptr(db), std::move(options)); + } else { + rocksdb::OptimisticTransactionDB *db{nullptr}; rocksdb::ColumnFamilyOptions cf_options(db_options); std::vector column_families; column_families.push_back(rocksdb::ColumnFamilyDescriptor(rocksdb::kDefaultColumnFamilyName, cf_options)); std::vector handles; + rocksdb::OptimisticTransactionDBOptions occ_options; + occ_options.validate_policy = rocksdb::OccValidationPolicy::kValidateSerial; TRY_STATUS(from_rocksdb(rocksdb::OptimisticTransactionDB::Open(db_options, occ_options, std::move(path), column_families, &handles, &db))); CHECK(handles.size() == 1); // i can delete the handle since DBImpl is always holding a reference to // default column family delete handles[0]; + return RocksDb(std::shared_ptr(db), std::move(options)); } - return RocksDb(std::shared_ptr(db), std::move(options)); } std::shared_ptr RocksDb::create_statistics() { return rocksdb::CreateDBStatistics(); } -std::string RocksDb::statistics_to_string(const std::shared_ptr statistics) { +std::string RocksDb::statistics_to_string(const std::shared_ptr &statistics) { return statistics->ToString(); } -void RocksDb::reset_statistics(const std::shared_ptr statistics) { +void RocksDb::reset_statistics(const std::shared_ptr &statistics) { statistics->Reset(); } @@ -133,7 +157,9 @@ std::string RocksDb::stats() const { } Result RocksDb::get(Slice key, std::string &value) { - //LOG(ERROR) << "GET"; + if (options_.no_reads) { + return td::Status::Error("trying to read from write-only database"); + } rocksdb::Status status; if (snapshot_) { rocksdb::ReadOptions options; @@ -162,6 +188,18 @@ Status RocksDb::set(Slice key, Slice value) { } return from_rocksdb(db_->Put({}, to_rocksdb(key), to_rocksdb(value))); } +Status RocksDb::merge(Slice key, Slice value) { + if (write_batch_) { + return from_rocksdb(write_batch_->Merge(to_rocksdb(key), to_rocksdb(value))); + } + if (transaction_) { + return from_rocksdb(transaction_->Merge(to_rocksdb(key), to_rocksdb(value))); + } + return from_rocksdb(db_->Merge({}, to_rocksdb(key), to_rocksdb(value))); +} +Status RocksDb::run_gc() { + return from_rocksdb(db_->CompactRange({}, nullptr, nullptr)); +} Status RocksDb::erase(Slice key) { if (write_batch_) { @@ -174,7 +212,11 @@ Status RocksDb::erase(Slice key) { } Result RocksDb::count(Slice prefix) { + if (options_.no_reads) { + return td::Status::Error("trying to read from write-only database"); + } rocksdb::ReadOptions options; + options.auto_prefix_mode = true; options.snapshot = snapshot_.get(); std::unique_ptr iterator; if (snapshot_ || !transaction_) { @@ -197,7 +239,11 @@ Result RocksDb::count(Slice prefix) { } Status RocksDb::for_each(std::function f) { + if (options_.no_reads) { + return td::Status::Error("trying to read from write-only database"); + } rocksdb::ReadOptions options; + options.auto_prefix_mode = true; options.snapshot = snapshot_.get(); std::unique_ptr iterator; if (snapshot_ || !transaction_) { @@ -219,7 +265,11 @@ Status RocksDb::for_each(std::function f) { } Status RocksDb::for_each_in_range(Slice begin, Slice end, std::function f) { + if (options_.no_reads) { + return td::Status::Error("trying to read from write-only database"); + } rocksdb::ReadOptions options; + options.auto_prefix_mode = true; options.snapshot = snapshot_.get(); std::unique_ptr iterator; if (snapshot_ || !transaction_) { @@ -252,9 +302,10 @@ Status RocksDb::begin_write_batch() { Status RocksDb::begin_transaction() { CHECK(!write_batch_); + CHECK(transaction_db_); rocksdb::WriteOptions options; options.sync = true; - transaction_.reset(db_->BeginTransaction(options, {})); + transaction_.reset(transaction_db_->BeginTransaction(options, {})); return Status::OK(); } @@ -307,7 +358,11 @@ Status RocksDb::end_snapshot() { } RocksDb::RocksDb(std::shared_ptr db, RocksDbOptions options) - : db_(std::move(db)), options_(options) { + : transaction_db_{db}, db_(std::move(db)), options_(std::move(options)) { +} + +RocksDb::RocksDb(std::shared_ptr db, RocksDbOptions options) + : db_(std::move(db)), options_(std::move(options)) { } void RocksDbSnapshotStatistics::begin_snapshot(const rocksdb::Snapshot *snapshot) { diff --git a/tddb/td/db/RocksDb.h b/tddb/td/db/RocksDb.h index 499a33281..d24a20dd7 100644 --- a/tddb/td/db/RocksDb.h +++ b/tddb/td/db/RocksDb.h @@ -36,12 +36,16 @@ #include namespace rocksdb { +class DB; +class Comparator; class Cache; class OptimisticTransactionDB; class Transaction; class WriteBatch; class Snapshot; class Statistics; +class MergeOperator; +class CompactionFilter; } // namespace rocksdb namespace td { @@ -61,6 +65,14 @@ struct RocksDbOptions { std::shared_ptr statistics = nullptr; std::shared_ptr block_cache; // Default - one 1G cache for all RocksDb std::shared_ptr snapshot_statistics = nullptr; + + std::shared_ptr merge_operator = nullptr; + const rocksdb::CompactionFilter *compaction_filter = nullptr; + + bool experimental = false; + bool no_reads = false; + bool no_transactions = false; + bool use_direct_reads = false; bool no_block_cache = false; }; @@ -73,10 +85,12 @@ class RocksDb : public KeyValue { Result get(Slice key, std::string &value) override; Status set(Slice key, Slice value) override; + Status merge(Slice key, Slice value) override; Status erase(Slice key) override; + Status run_gc() override; Result count(Slice prefix) override; Status for_each(std::function f) override; - Status for_each_in_range (Slice begin, Slice end, std::function f) override; + Status for_each_in_range(Slice begin, Slice end, std::function f) override; Status begin_write_batch() override; Status commit_write_batch() override; @@ -94,8 +108,8 @@ class RocksDb : public KeyValue { std::string stats() const override; static std::shared_ptr create_statistics(); - static std::string statistics_to_string(const std::shared_ptr statistics); - static void reset_statistics(const std::shared_ptr statistics); + static std::string statistics_to_string(const std::shared_ptr &statistics); + static void reset_statistics(const std::shared_ptr &statistics); static std::shared_ptr create_cache(size_t capacity); @@ -103,12 +117,13 @@ class RocksDb : public KeyValue { RocksDb &operator=(RocksDb &&); ~RocksDb(); - std::shared_ptr raw_db() const { + std::shared_ptr raw_db() const { return db_; }; private: - std::shared_ptr db_; + std::shared_ptr transaction_db_; + std::shared_ptr db_; RocksDbOptions options_; std::unique_ptr transaction_; @@ -123,5 +138,6 @@ class RocksDb : public KeyValue { std::unique_ptr snapshot_; explicit RocksDb(std::shared_ptr db, RocksDbOptions options); + explicit RocksDb(std::shared_ptr db, RocksDbOptions options); }; } // namespace td diff --git a/tdutils/td/utils/MpmcQueue.h b/tdutils/td/utils/MpmcQueue.h index e6504e358..1a5f8fa36 100644 --- a/tdutils/td/utils/MpmcQueue.h +++ b/tdutils/td/utils/MpmcQueue.h @@ -414,7 +414,9 @@ class MpmcQueue { while (true) { auto node = hazard_pointers_.protect(thread_id, 0, read_pos_); auto &block = node->block; - if (block.write_pos <= block.read_pos && node->next.load(std::memory_order_relaxed) == nullptr) { + auto read_pos = block.read_pos.load(); + auto write_pos = block.write_pos.load(); + if (write_pos <= read_pos && node->next.load(std::memory_order_relaxed) == nullptr) { return false; } auto pos = block.read_pos++; diff --git a/tdutils/td/utils/Status.h b/tdutils/td/utils/Status.h index cff808143..f75de466a 100644 --- a/tdutils/td/utils/Status.h +++ b/tdutils/td/utils/Status.h @@ -619,6 +619,13 @@ inline Result::Result(Status &&status) : status_(std::move(status)) { inline StringBuilder &operator<<(StringBuilder &string_builder, const Status &status) { return status.print(string_builder); } +template +StringBuilder &operator<<(StringBuilder &sb, const Result &result) { + if (result.is_ok()) { + return sb << "Ok{" << result.ok() << "}"; + } + return sb << result.error(); +} namespace detail { diff --git a/tdutils/td/utils/ThreadSafeCounter.h b/tdutils/td/utils/ThreadSafeCounter.h index aa976b2fb..46dc16bf7 100644 --- a/tdutils/td/utils/ThreadSafeCounter.h +++ b/tdutils/td/utils/ThreadSafeCounter.h @@ -19,6 +19,7 @@ #pragma once +#include "port/thread.h" #include "td/utils/common.h" #include "td/utils/Slice.h" #include "td/utils/StringBuilder.h" @@ -26,6 +27,7 @@ #include #include +#include #include namespace td { @@ -69,6 +71,50 @@ class ThreadSafeCounter { ThreadSafeMultiCounter<1> counter_; }; +struct NamedStats { + std::map stats_int; + std::map stats_str; + + NamedStats with_suffix(const std::string &suffix) const { + NamedStats res; + for (auto &p : stats_int) { + res.stats_int[p.first + suffix] = p.second; + } + for (auto &p : stats_str) { + res.stats_str[p.first + suffix] = p.second; + } + return res; + } + NamedStats with_prefix(const std::string &prefix) const { + NamedStats res; + for (auto &p : stats_int) { + res.stats_int[prefix + p.first] = p.second; + } + for (auto &p : stats_str) { + res.stats_str[prefix + p.first] = p.second; + } + return res; + } + void apply_diff(const NamedStats &other) { + for (auto &p : other.stats_int) { + stats_int[p.first] += p.second; + } + for (auto &p : other.stats_str) { + stats_str[p.first] = p.second; + } + } + void subtract_diff(const NamedStats &other) { + for (auto &p : other.stats_int) { + stats_int[p.first] -= p.second; + } + } + NamedStats combine_with(const NamedStats &other) const { + NamedStats res = *this; + res.apply_diff(other); + return res; + } +}; + class NamedThreadSafeCounter { static constexpr int N = 128; using Counter = ThreadSafeMultiCounter; @@ -79,6 +125,9 @@ class NamedThreadSafeCounter { CounterRef() = default; CounterRef(size_t index, Counter *counter) : index_(index), counter_(counter) { } + void inc() { + add(1); + } void add(int64 diff) { counter_->add(index_, diff); } @@ -119,6 +168,11 @@ class NamedThreadSafeCounter { f(names_[i], counter_.sum(i)); } } + NamedStats get_stats() const { + NamedStats res; + for_each([&](Slice name, int64 cnt) { res.stats_int.emplace(name.str(), cnt); }); + return res; + } void clear() { std::unique_lock guard(mutex_); @@ -181,11 +235,11 @@ struct NamedPerfCounter { } // namespace td -#define TD_PERF_COUNTER(name) \ +#define TD_PERF_COUNTER(name) \ static auto perf_##name = td::NamedPerfCounter::get_default().get_counter(td::Slice(#name)); \ auto scoped_perf_##name = td::NamedPerfCounter::ScopedPerfCounterRef{.perf_counter = perf_##name}; -#define TD_PERF_COUNTER_SINCE(name, since) \ +#define TD_PERF_COUNTER_SINCE(name, since) \ static auto perf_##name = td::NamedPerfCounter::get_default().get_counter(td::Slice(#name)); \ - auto scoped_perf_##name = \ + auto scoped_perf_##name = \ td::NamedPerfCounter::ScopedPerfCounterRef{.perf_counter = perf_##name, .started_at_ticks = since}; diff --git a/validator-engine/validator-engine.cpp b/validator-engine/validator-engine.cpp index 2ea04e183..cbcc3ab1f 100644 --- a/validator-engine/validator-engine.cpp +++ b/validator-engine/validator-engine.cpp @@ -1414,6 +1414,9 @@ td::Status ValidatorEngine::load_global_config() { if (zero_state.root_hash.is_zero() || zero_state.file_hash.is_zero()) { return td::Status::Error(ton::ErrorCode::error, "[validator] section contains incomplete [zero_state]"); } + if (celldb_in_memory_ && celldb_v2_) { + return td::Status::Error(ton::ErrorCode::error, "at most one of --celldb-in-memory --celldb-v2 could be used"); + } ton::BlockIdExt init_block; if (!conf.validator_->init_block_) { @@ -1461,11 +1464,12 @@ td::Status ValidatorEngine::load_global_config() { if (!session_logs_file_.empty()) { validator_options_.write().set_session_logs_file(session_logs_file_); } - if (celldb_in_memory_) { + if (celldb_in_memory_ || celldb_v2_) { celldb_compress_depth_ = 0; } validator_options_.write().set_celldb_compress_depth(celldb_compress_depth_); validator_options_.write().set_celldb_in_memory(celldb_in_memory_); + validator_options_.write().set_celldb_v2(celldb_v2_); validator_options_.write().set_max_open_archive_files(max_open_archive_files_); validator_options_.write().set_archive_preload_period(archive_preload_period_); validator_options_.write().set_disable_rocksdb_stats(disable_rocksdb_stats_); @@ -4526,6 +4530,12 @@ int main(int argc, char *argv[]) { [&]() { acts.push_back([&x]() { td::actor::send_closure(x, &ValidatorEngine::set_celldb_in_memory, true); }); }); + p.add_option( + '\0', "celldb-v2", + "use new version off celldb", + [&]() { + acts.push_back([&x]() { td::actor::send_closure(x, &ValidatorEngine::set_celldb_v2, true); }); + }); p.add_checked_option( '\0', "catchain-max-block-delay", "delay before creating a new catchain block, in seconds (default: 0.4)", [&](td::Slice s) -> td::Status { diff --git a/validator-engine/validator-engine.hpp b/validator-engine/validator-engine.hpp index e0dc91f13..5a1db7f3f 100644 --- a/validator-engine/validator-engine.hpp +++ b/validator-engine/validator-engine.hpp @@ -218,6 +218,7 @@ class ValidatorEngine : public td::actor::Actor { bool celldb_direct_io_ = false; bool celldb_preload_all_ = false; bool celldb_in_memory_ = false; + bool celldb_v2_ = false; td::optional catchain_max_block_delay_, catchain_max_block_delay_slow_; bool read_config_ = false; bool started_keyring_ = false; @@ -311,6 +312,9 @@ class ValidatorEngine : public td::actor::Actor { void set_celldb_in_memory(bool value) { celldb_in_memory_ = value; } + void set_celldb_v2(bool value) { + celldb_v2_ = value; + } void set_catchain_max_block_delay(double value) { catchain_max_block_delay_ = value; } diff --git a/validator/db/celldb.cpp b/validator/db/celldb.cpp index e86a373d1..90c659cc4 100644 --- a/validator/db/celldb.cpp +++ b/validator/db/celldb.cpp @@ -28,6 +28,9 @@ #include "ton/ton-io.hpp" #include "common/delay.h" +#include +#include + namespace ton { namespace validator { @@ -73,6 +76,101 @@ CellDbIn::CellDbIn(td::actor::ActorId root_db, td::actor::ActorId td::Slice { + return td::Slice(value.data(), value.size()); + } + bool FullMergeV2(const MergeOperationInput& merge_in, MergeOperationOutput* merge_out) const override { + CHECK(merge_in.existing_value); + auto& value = *merge_in.existing_value; + CHECK(merge_in.operand_list.size() >= 1); + td::Slice diff; + std::string diff_buf; + if (merge_in.operand_list.size() == 1) { + diff = to_td(merge_in.operand_list[0]); + } else { + diff_buf = merge_in.operand_list[0].ToString(); + for (size_t i = 1; i < merge_in.operand_list.size(); ++i) { + vm::CellStorer::merge_refcnt_diffs(diff_buf, to_td(merge_in.operand_list[i])); + } + diff = diff_buf; + } + + merge_out->new_value = value.ToString(); + vm::CellStorer::merge_value_and_refcnt_diff(merge_out->new_value, diff); + return true; + } + bool PartialMerge(const rocksdb::Slice& /*key*/, const rocksdb::Slice& left, const rocksdb::Slice& right, + std::string* new_value, rocksdb::Logger* logger) const override { + *new_value = left.ToString(); + vm::CellStorer::merge_refcnt_diffs(*new_value, to_td(right)); + return true; + } +}; + +void CellDbIn::validate_meta() { + LOG(INFO) << "Validating metadata\n"; + size_t max_meta_keys_loaded = opts_->get_celldb_in_memory() ? std::numeric_limits::max() : 10000; + auto meta = boc_->meta_get_all(max_meta_keys_loaded).move_as_ok(); + bool partial_check = meta.size() == max_meta_keys_loaded; + if (partial_check) { + LOG(ERROR) << "Too much metadata in the database, do only partial check"; + } + size_t missing_roots = 0; + size_t unknown_roots = 0; + std::set root_hashes; + for (auto [k, v] : meta) { + if (k == "desczero") { + continue; + } + auto obj = fetch_tl_object(td::BufferSlice{v}, true); + obj.ensure(); + auto entry = DbEntry{obj.move_as_ok()}; + root_hashes.insert(vm::CellHash::from_slice(entry.root_hash.as_slice())); + auto cell = boc_->load_cell(entry.root_hash.as_slice()); + missing_roots += cell.is_error(); + LOG_IF(ERROR, cell.is_error()) << "Cannot load root from meta: " << entry.block_id.to_str() << " " << cell.error(); + } + + // load_known_roots is only supported by InMemory database, so it is ok to check all known roots here + auto known_roots = boc_->load_known_roots().move_as_ok(); + for (auto& root : known_roots) { + block::gen::ShardStateUnsplit::Record info; + block::gen::OutMsgQueueInfo::Record qinfo; + block::ShardId shard; + if (!(tlb::unpack_cell(root, info) && shard.deserialize(info.shard_id.write()) && + tlb::unpack_cell(info.out_msg_queue_info, qinfo))) { + LOG(FATAL) << "cannot create ShardDescr from a root in celldb"; + } + if (!partial_check && !root_hashes.contains(root->get_hash())) { + unknown_roots++; + LOG(ERROR) << "Unknown root" << ShardIdFull(shard).to_str() << ":" << info.seq_no; + constexpr bool delete_unknown_roots = false; + if (delete_unknown_roots) { + vm::CellStorer stor{*cell_db_}; + cell_db_->begin_write_batch().ensure(); + boc_->dec(root); + boc_->commit(stor).ensure(); + cell_db_->commit_write_batch().ensure(); + if (!opts_->get_celldb_in_memory()) { + boc_->set_loader(std::make_unique(cell_db_->snapshot(), on_load_callback_)).ensure(); + } + LOG(ERROR) << "Unknown root" << ShardIdFull(shard).to_str() << ":" << info.seq_no << " REMOVED"; + } + } + } + + LOG_IF(ERROR, missing_roots != 0) << "Missing root hashes: " << missing_roots; + LOG_IF(ERROR, unknown_roots != 0) << "Unknown roots: " << unknown_roots; + + LOG_IF(FATAL, missing_roots != 0) << "Missing root hashes: " << missing_roots; + LOG_IF(FATAL, unknown_roots != 0) << "Unknown roots: " << unknown_roots; + LOG(INFO) << "Validating metadata: OK\n"; +} + void CellDbIn::start_up() { on_load_callback_ = [actor = std::make_shared>( td::actor::create_actor("celldbmigration", actor_id(this))), @@ -96,44 +194,101 @@ void CellDbIn::start_up() { db_options.snapshot_statistics = snapshot_statistics_; } db_options.statistics = statistics_; - if (opts_->get_celldb_cache_size()) { - db_options.block_cache = td::RocksDb::create_cache(opts_->get_celldb_cache_size().value()); - LOG(WARNING) << "Set CellDb block cache size to " << td::format::as_size(opts_->get_celldb_cache_size().value()); + auto o_celldb_cache_size = opts_->get_celldb_cache_size(); + + std::optional boc_in_memory_options; + std::optional boc_v1_options; + std::optional boc_v2_options; + + if (opts_->get_celldb_v2()) { + boc_v2_options = vm::DynamicBagOfCellsDb::CreateV2Options{ + .extra_threads = std::clamp(std::thread::hardware_concurrency() / 2, 1u, 8u), + .executor = {}, + .cache_ttl_max = 2000, + .cache_size_max = 1000000}; + size_t min_rocksdb_cache = std::max(size_t{1} << 30, boc_v2_options->cache_size_max * 5000); + if (!o_celldb_cache_size || o_celldb_cache_size.value() < min_rocksdb_cache) { + LOG(WARNING) << "Increase CellDb block cache size to " << td::format::as_size(min_rocksdb_cache) << " from " + << td::format::as_size(o_celldb_cache_size.value()); + o_celldb_cache_size = min_rocksdb_cache; + } + LOG(WARNING) << "Using V2 DynamicBagOfCells with options " << *boc_v2_options; + } else if (opts_->get_celldb_in_memory()) { + // default options + boc_in_memory_options = vm::DynamicBagOfCellsDb::CreateInMemoryOptions{ + .extra_threads = std::thread::hardware_concurrency(), + .verbose = true, + .use_arena = false, + .use_less_memory_during_creation = true, + }; + LOG(WARNING) << "Using InMemory DynamicBagOfCells with options " << *boc_v2_options; + } else { + boc_v1_options = vm::DynamicBagOfCellsDb::CreateV1Options{}; + LOG(WARNING) << "Using V1 DynamicBagOfCells with options " << *boc_v1_options; + } + + if (o_celldb_cache_size) { + db_options.block_cache = td::RocksDb::create_cache(o_celldb_cache_size.value()); + LOG(WARNING) << "Set CellDb block cache size to " << td::format::as_size(o_celldb_cache_size.value()); } db_options.use_direct_reads = opts_->get_celldb_direct_io(); + // NB: from now on we MUST use this merge operator + // Only V2 and InMemory BoC actually use them, but it still should be kept for V1, + // to handle updates written by V2 or InMemory BoCs + db_options.merge_operator = std::make_shared(); + if (opts_->get_celldb_in_memory()) { td::RocksDbOptions read_db_options; read_db_options.use_direct_reads = true; read_db_options.no_block_cache = true; read_db_options.block_cache = {}; + read_db_options.merge_operator = std::make_shared(); LOG(WARNING) << "Loading all cells in memory (because of --celldb-in-memory)"; td::Timer timer; auto read_cell_db = std::make_shared(td::RocksDb::open(path_, std::move(read_db_options)).move_as_ok()); - boc_ = vm::DynamicBagOfCellsDb::create_in_memory(read_cell_db.get(), {}); + boc_ = vm::DynamicBagOfCellsDb::create_in_memory(read_cell_db.get(), *boc_in_memory_options); in_memory_load_time_ = timer.elapsed(); - td::actor::send_closure(parent_, &CellDb::set_in_memory_boc, boc_); + + // no reads will be allowed from rocksdb, only writes + db_options.no_reads = true; } auto rocks_db = std::make_shared(td::RocksDb::open(path_, std::move(db_options)).move_as_ok()); rocks_db_ = rocks_db->raw_db(); cell_db_ = std::move(rocks_db); if (!opts_->get_celldb_in_memory()) { - boc_ = vm::DynamicBagOfCellsDb::create(); + if (opts_->get_celldb_v2()) { + boc_ = vm::DynamicBagOfCellsDb::create_v2(*boc_v2_options); + } else { + boc_ = vm::DynamicBagOfCellsDb::create(*boc_v1_options); + } boc_->set_celldb_compress_depth(opts_->get_celldb_compress_depth()); boc_->set_loader(std::make_unique(cell_db_->snapshot(), on_load_callback_)).ensure(); - td::actor::send_closure(parent_, &CellDb::update_snapshot, cell_db_->snapshot()); } + validate_meta(); + alarm_timestamp() = td::Timestamp::in(10.0); auto empty = get_empty_key_hash(); if (get_block(empty).is_error()) { DbEntry e{get_empty_key(), empty, empty, RootHash::zero()}; + vm::CellStorer stor{*cell_db_}; cell_db_->begin_write_batch().ensure(); set_block(empty, std::move(e)); + boc_->commit(stor); cell_db_->commit_write_batch().ensure(); + if (!opts_->get_celldb_in_memory()) { + boc_->set_loader(std::make_unique(cell_db_->snapshot(), on_load_callback_)).ensure(); + } + } + + if (opts_->get_celldb_v2() || opts_->get_celldb_in_memory()) { + send_closure(parent_, &CellDb::set_thread_safe_boc, boc_); + } else { + send_closure(parent_, &CellDb::update_snapshot, cell_db_->snapshot()); } if (opts_->get_celldb_preload_all()) { @@ -161,7 +316,7 @@ void CellDbIn::start_up() { { std::string key = "stats.last_deleted_mc_seqno", value; - auto R = cell_db_->get(td::as_slice(key), value); + auto R = boc_->meta_get(td::as_slice(key), value); R.ensure(); if (R.ok() == td::KeyValue::GetStatus::Ok) { auto r_value = td::to_integer_safe(value); @@ -240,10 +395,10 @@ void CellDbIn::store_cell(BlockIdExt block_id, td::Ref cell, td::Promi td::Timer timer_write; vm::CellStorer stor{*cell_db_}; cell_db_->begin_write_batch().ensure(); - boc_->commit(stor).ensure(); set_block(get_empty_key_hash(), std::move(E)); set_block(D.prev, std::move(P)); set_block(key_hash, std::move(D)); + boc_->commit(stor).ensure(); cell_db_->commit_write_batch().ensure(); timer_write.pause(); @@ -266,11 +421,10 @@ void CellDbIn::store_cell(BlockIdExt block_id, td::Ref cell, td::Promi void CellDbIn::get_cell_db_reader(td::Promise> promise) { if (db_busy_) { - action_queue_.push( - [self = this, promise = std::move(promise)](td::Result R) mutable { - R.ensure(); - self->get_cell_db_reader(std::move(promise)); - }); + action_queue_.push([self = this, promise = std::move(promise)](td::Result R) mutable { + R.ensure(); + self->get_cell_db_reader(std::move(promise)); + }); return; } promise.set_result(boc_->get_cell_db_reader()); @@ -440,9 +594,16 @@ void CellDbIn::gc_cont2(BlockHandle handle) { timer_get_keys.reset(); td::PerfWarningTimer timer_boc{"gccell_boc", 0.05}; - auto cell = boc_->load_cell(F.root_hash.as_slice()).move_as_ok(); + auto r_cell = boc_->load_cell(F.root_hash.as_slice()); + td::Ref cell; + if (r_cell.is_ok()) { + cell = r_cell.move_as_ok(); + boc_->dec(cell); + LOG(ERROR) << "GC of " << handle->id().to_str(); + } else { + LOG(ERROR) << "GC of UNKNOWN root: " << handle->id().to_str(); + } - boc_->dec(cell); db_busy_ = true; boc_->prepare_commit_async( async_executor, [this, SelfId = actor_id(this), timer_boc = std::move(timer_boc), F = std::move(F), key_hash, @@ -458,17 +619,19 @@ void CellDbIn::gc_cont2(BlockHandle handle) { td::PerfWarningTimer timer_write_batch{"gccell_write_batch", 0.05}; cell_db_->begin_write_batch().ensure(); - boc_->commit(stor).ensure(); - cell_db_->erase(get_key(key_hash)).ensure(); + boc_->meta_erase(get_key(key_hash)).ensure(); set_block(F.prev, std::move(P)); set_block(F.next, std::move(N)); if (handle->id().is_masterchain()) { last_deleted_mc_state_ = handle->id().seqno(); std::string key = "stats.last_deleted_mc_seqno", value = td::to_string(last_deleted_mc_state_); - cell_db_->set(td::as_slice(key), td::as_slice(value)); + boc_->meta_set(td::as_slice(key), td::as_slice(value)); } + + boc_->commit(stor).ensure(); cell_db_->commit_write_batch().ensure(); + alarm_timestamp() = td::Timestamp::now(); timer_write_batch.reset(); @@ -530,7 +693,7 @@ CellDbIn::KeyHash CellDbIn::get_empty_key_hash() { td::Result CellDbIn::get_block(KeyHash key_hash) { const auto key = get_key(key_hash); std::string value; - auto R = cell_db_->get(td::as_slice(key), value); + auto R = boc_->meta_get(td::as_slice(key), value); R.ensure(); auto S = R.move_as_ok(); if (S == td::KeyValue::GetStatus::NotFound) { @@ -543,7 +706,7 @@ td::Result CellDbIn::get_block(KeyHash key_hash) { void CellDbIn::set_block(KeyHash key_hash, DbEntry e) { const auto key = get_key(key_hash); - cell_db_->set(td::as_slice(key), e.release()).ensure(); + boc_->meta_set(td::as_slice(key), e.release()); } void CellDbIn::migrate_cell(td::Bits256 hash) { @@ -631,12 +794,14 @@ void CellDb::alarm() { } void CellDb::load_cell(RootHash hash, td::Promise> promise) { - if (in_memory_boc_) { - auto result = in_memory_boc_->load_root_thread_safe(hash.as_slice()); + if (thread_safe_boc_) { + auto result = thread_safe_boc_->load_root_thread_safe(hash.as_slice()); if (result.is_ok()) { return async_apply("load_cell_result", std::move(promise), std::move(result)); } else { LOG(ERROR) << "load_root_thread_safe failed - this is suspicious"; + send_closure(cell_db_, &CellDbIn::load_cell, hash, std::move(promise)); + return; } } if (!started_) { @@ -710,6 +875,13 @@ std::vector> CellDbIn::CellDbStatistics::pre for (auto& [key, value] : boc_stats_->custom_stats) { stats.emplace_back(key, value); } + + for (auto& [key, value] : boc_stats_->named_stats.stats_str) { + stats.emplace_back(key, value); + } + for (auto& [key, value] : boc_stats_->named_stats.stats_int) { + stats.emplace_back(key, td::to_string(value)); + } } return stats; } diff --git a/validator/db/celldb.hpp b/validator/db/celldb.hpp index 5639b9748..1e1ccddab 100644 --- a/validator/db/celldb.hpp +++ b/validator/db/celldb.hpp @@ -74,6 +74,7 @@ class CellDbIn : public CellDbBase { CellDbIn(td::actor::ActorId root_db, td::actor::ActorId parent, std::string path, td::Ref opts); + void validate_meta(); void start_up() override; void alarm() override; @@ -195,13 +196,13 @@ class CellDb : public CellDbBase { started_ = true; boc_->set_loader(std::make_unique(std::move(snapshot), on_load_callback_)).ensure(); } - void set_in_memory_boc(std::shared_ptr in_memory_boc) { - CHECK(opts_->get_celldb_in_memory()); + void set_thread_safe_boc(std::shared_ptr thread_safe_boc) { + CHECK(opts_->get_celldb_in_memory() || opts_->get_celldb_v2()); if (!started_) { alarm(); } started_ = true; - in_memory_boc_ = std::move(in_memory_boc); + thread_safe_boc_ = std::move(thread_safe_boc); } void get_cell_db_reader(td::Promise> promise); @@ -219,7 +220,7 @@ class CellDb : public CellDbBase { td::actor::ActorOwn cell_db_; std::unique_ptr boc_; - std::shared_ptr in_memory_boc_; + std::shared_ptr thread_safe_boc_; bool started_ = false; std::vector> prepared_stats_{{"started", "false"}}; diff --git a/validator/validator-options.hpp b/validator/validator-options.hpp index ace6b1066..45b8d7ec2 100644 --- a/validator/validator-options.hpp +++ b/validator/validator-options.hpp @@ -139,6 +139,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { bool get_celldb_in_memory() const override { return celldb_in_memory_; } + bool get_celldb_v2() const override { + return celldb_v2_; + } td::optional get_catchain_max_block_delay() const override { return catchain_max_block_delay_; } @@ -237,6 +240,9 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { void set_celldb_in_memory(bool value) override { celldb_in_memory_ = value; } + void set_celldb_v2(bool value) override { + celldb_v2_ = value; + } void set_catchain_max_block_delay(double value) override { catchain_max_block_delay_ = value; } @@ -304,6 +310,7 @@ struct ValidatorManagerOptionsImpl : public ValidatorManagerOptions { bool celldb_direct_io_ = false; bool celldb_preload_all_ = false; bool celldb_in_memory_ = false; + bool celldb_v2_ = false; td::optional catchain_max_block_delay_, catchain_max_block_delay_slow_; bool state_serializer_enabled_ = true; td::Ref collator_options_{true}; diff --git a/validator/validator.h b/validator/validator.h index 5d6c0173c..66795cece 100644 --- a/validator/validator.h +++ b/validator/validator.h @@ -104,6 +104,7 @@ struct ValidatorManagerOptions : public td::CntObject { virtual std::string get_session_logs_file() const = 0; virtual td::uint32 get_celldb_compress_depth() const = 0; virtual bool get_celldb_in_memory() const = 0; + virtual bool get_celldb_v2() const = 0; virtual size_t get_max_open_archive_files() const = 0; virtual double get_archive_preload_period() const = 0; virtual bool get_disable_rocksdb_stats() const = 0; @@ -144,6 +145,7 @@ struct ValidatorManagerOptions : public td::CntObject { virtual void set_celldb_direct_io(bool value) = 0; virtual void set_celldb_preload_all(bool value) = 0; virtual void set_celldb_in_memory(bool value) = 0; + virtual void set_celldb_v2(bool value) = 0; virtual void set_catchain_max_block_delay(double value) = 0; virtual void set_catchain_max_block_delay_slow(double value) = 0; virtual void set_state_serializer_enabled(bool value) = 0;