From a439bb3c91976ae0a1bd460783c6415194e1a6c5 Mon Sep 17 00:00:00 2001 From: Vladimir Petrzhikovskii Date: Wed, 11 Dec 2024 19:46:01 +0100 Subject: [PATCH] test(storage): add fast gc test --- Cargo.lock | 6 +- Cargo.toml | 2 +- .../src/store/shard_state/store_state_raw.rs | 124 +++++++++++++++++- 3 files changed, 127 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e380c7a92..1c32e19f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -306,7 +306,7 @@ dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools 0.10.5", + "itertools 0.12.1", "lazy_static", "lazycell", "log", @@ -1507,7 +1507,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.6", ] [[package]] @@ -4026,7 +4026,7 @@ dependencies = [ [[package]] name = "weedb" version = "0.3.8" -source = "git+https://github.com/broxus/weedb.git?branch=next-rocksdb#be76187ed31348144bdab3e113ad7de114d99ac6" +source = "git+https://github.com/broxus/weedb.git?rev=be76187ed31348144bdab3e113ad7de114d99ac6#be76187ed31348144bdab3e113ad7de114d99ac6" dependencies = [ "librocksdb-sys", "metrics", diff --git a/Cargo.toml b/Cargo.toml index 6f4d6aa62..805bc2698 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -135,7 +135,7 @@ tycho-storage = { path = "./storage", version = "0.1.4" } tycho-util = { path = "./util", version = "0.1.4" } [patch.crates-io] -weedb = { version = "0.3.8", git = "https://github.com/broxus/weedb.git", branch = "next-rocksdb" } +weedb = { version = "0.3.8", git = "https://github.com/broxus/weedb.git", rev = "be76187ed31348144bdab3e113ad7de114d99ac6" } [workspace.lints.rust] future_incompatible = "warn" diff --git a/storage/src/store/shard_state/store_state_raw.rs b/storage/src/store/shard_state/store_state_raw.rs index e3299cdef..5eb859486 100644 --- a/storage/src/store/shard_state/store_state_raw.rs +++ b/storage/src/store/shard_state/store_state_raw.rs @@ -544,10 +544,15 @@ enum StoreStateError { #[cfg(test)] mod test { + use std::collections::BTreeSet; + use bytesize::ByteSize; use everscale_types::models::ShardIdent; + use everscale_types::prelude::Dict; + use rand::prelude::SliceRandom; + use rand::{Rng, SeedableRng}; use tycho_util::project_root; - use weedb::rocksdb::IteratorMode; + use weedb::rocksdb::{IteratorMode, WriteBatch}; use super::*; use crate::{Storage, StorageConfig}; @@ -645,6 +650,123 @@ mod test { Ok(()) } + use rand::rngs::StdRng; + + #[tokio::test] + async fn rand_cells_storage() -> Result<()> { + tycho_util::test::init_logger("rand_cells_storage", "debug"); + + let (storage, _tempdir) = Storage::new_temp().await?; + let base_db = storage.base_db(); + let cell_storage = &storage.shard_state_storage().cell_storage; + + let mut rng = StdRng::seed_from_u64(1337); + + let mut cell_keys = Vec::new(); + + const INITIAL_SIZE: usize = 100_000; + + let mut keys: BTreeSet = + (0..INITIAL_SIZE).map(|_| HashBytes(rng.gen())).collect(); + + let value = new_cell(4); // 4 is a random number, trust me + + let keys_inner = keys.iter().map(|k| (*k, value.clone())).collect::>(); + let mut dict: Dict = Dict::try_from_sorted_slice(&keys_inner)?; + + // 2. Modification Loop + + const MODIFY_COUNT: usize = INITIAL_SIZE / 50; + + for i in 0..20 { + let keys_inner: Vec<_> = keys.iter().copied().collect(); + + let keys_to_remove: Vec<_> = + keys_inner.choose_multiple(&mut rng, MODIFY_COUNT).collect(); + + // Remove + for key in keys_to_remove { + dict.remove(key)?; + keys.remove(key); + } + + let keys_inner: Vec<_> = keys.iter().copied().collect(); + let keys_to_update = keys_inner + .choose_multiple(&mut rng, MODIFY_COUNT) + .collect::>(); + + // Update + for key in keys_to_update { + let value = new_cell(rng.gen()); + dict.set(key, value)?; + } + + // Insert + for val in 0..MODIFY_COUNT { + let key = HashBytes(rng.gen()); + let value = new_cell(val as u32); + keys.insert(key); + dict.set(key, value.clone())?; + } + + // Store + let new_dict_cell = CellBuilder::build_from(dict.clone())?; + + let cell_hash = new_dict_cell.repr_hash(); + let mut batch = WriteBatch::new(); + let traversed = + cell_storage.store_cell(&mut batch, new_dict_cell.as_ref(), MODIFY_COUNT * 3)?; + + cell_keys.push(*cell_hash); + + base_db + .rocksdb() + .write_opt(batch, base_db.cells.write_config())?; + + tracing::info!("Iteration {i} Finished. traversed: {traversed}",); + } + + let mut bump = bumpalo::Bump::new(); + + tracing::info!("Starting GC"); + let total = cell_keys.len(); + for (id, key) in cell_keys.into_iter().enumerate() { + let cell = cell_storage.load_cell(key)?; + + traverse_cell((cell as Arc).as_ref()); + + let (res, batch) = cell_storage.remove_cell(&bump, &key)?; + base_db + .rocksdb() + .write_opt(batch, base_db.cells.write_config())?; + tracing::info!("Gc {id} of {total} done. Traversed: {res}",); + bump.reset(); + } + + // two compactions in row. First one run merge operators, second one will remove all tombstones + base_db.trigger_compaction().await; + base_db.trigger_compaction().await; + + let cells_left = base_db.cells.iterator(IteratorMode::Start).count(); + tracing::info!("States GC finished. Cells left: {cells_left}"); + assert_eq!(cells_left, 0, "Gc is broken. Press F to pay respect"); + Ok(()) + } + + fn traverse_cell(cell: &DynCell) { + for cell in cell.references() { + traverse_cell(cell); + } + } + + fn new_cell(value: u32) -> Cell { + let mut cell = CellBuilder::new(); + cell.store_u32(value).unwrap(); + cell.store_u64(1).unwrap(); + cell.store_reference(cell.clone().build().unwrap()).unwrap(); + cell.build().unwrap() + } + fn parse_filename(name: &str) -> BlockId { // Split the remaining string by commas into components let parts: Vec<&str> = name.split(',').collect();