Skip to content

Commit

Permalink
chain: new optimized chain store
Browse files Browse the repository at this point in the history
The current chainstore is based on `kv`, but it has a few problems:
  - When we flush, we get a huge heap spike
  - We are getting a 2 or 3 times overhead on headers
  - It gets kinda slow to retrieve headers during IBD if we flush early

This commit introduces a bare-bones, ad-hock store that consists in two
parts:
  - A open addressing, file backed and memory-mapped hash map to keep
    the relation block_hash -> block_height
  - A flat file that contains block headers serialized, in ascending
    order
  - A LRU cache to avoid going througth the map every time

To recover a header, given the block height, we simply use pointer
arithmetic inside the flat file. If we need to get from the block hash,
use the map first, then find it inside the flat file. This has the
advantage of not needing explicit flushes (the os will flush it in fixed
intervals), flushes are async (the os will do it), we get caching for
free (mmap-ed pages will stay in memory if we need) and our cache can
react to system constraints, because the kernel will always know how
much memory we sill have
  • Loading branch information
Davidson-Souza committed Feb 10, 2025
1 parent 879e263 commit 9f33428
Show file tree
Hide file tree
Showing 13 changed files with 1,230 additions and 38 deletions.
31 changes: 31 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions crates/floresta-chain/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ secp256k1 = { version = "*", features = ["alloc"], optional = true }
floresta-common = { path = "../floresta-common", default-features = false, features = ["std"] }
bitcoinconsensus = { version = "0.106.0", optional = true, default-features = false }
metrics = { path = "../../metrics", optional = true }
memmap2 = { version = "0.9.5", optional = true }
lru = { version = "0.12.5", optional = true }

[dev-dependencies]
criterion = "0.5.1"
Expand All @@ -45,6 +47,7 @@ hex = "0.4.3"
default = []
bitcoinconsensus = ["bitcoin/bitcoinconsensus", "dep:bitcoinconsensus"]
metrics = ["dep:metrics"]
experimental-db = ["memmap2", "lru"]

[[bench]]
name = "chain_state_bench"
Expand Down
2 changes: 0 additions & 2 deletions crates/floresta-chain/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
//! All data is stored in a `ChainStore` implementation, which is generic over the
//! underlying database. See the ChainStore trait for more information. For a
//! ready-to-use implementation, see the [KvChainStore] struct.
#![cfg_attr(not(test), no_std)]

pub mod pruned_utreexo;
pub(crate) use floresta_common::prelude;
pub use pruned_utreexo::chain_state::*;
Expand Down
39 changes: 26 additions & 13 deletions crates/floresta-chain/src/pruned_utreexo/chain_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ use bitcoin::Transaction;
use bitcoin::TxOut;
use bitcoin::Work;
use floresta_common::Channel;
use log::debug;
use log::info;
use log::trace;
use log::warn;
#[cfg(feature = "metrics")]
use metrics;
Expand All @@ -38,7 +38,6 @@ use spin::RwLock;
use super::chain_state_builder::ChainStateBuilder;
use super::chainparams::ChainParams;
use super::chainstore::DiskBlockHeader;
use super::chainstore::KvChainStore;
use super::consensus::Consensus;
use super::error::BlockValidationErrors;
use super::error::BlockchainError;
Expand Down Expand Up @@ -92,9 +91,18 @@ pub struct ChainStateInner<PersistedState: ChainStore> {
/// is still validated.
assume_valid: Option<BlockHash>,
}

pub struct ChainState<PersistedState: ChainStore> {
inner: RwLock<ChainStateInner<PersistedState>>,
}

/// Force chainstate to be Sync, even if the underlying chainstore isn't
///
/// This is safe because we only access the chainstore through the inner lock, and we don't
/// expose the chainstore to the outside world. We could use a lock for the chainstore, but
/// that would be overkill and would make a big performance hit.
unsafe impl<T: ChainStore + Send + 'static> Sync for ChainState<T> {}

#[derive(Debug, Copy, Clone)]
pub enum AssumeValidArg {
Disabled,
Expand Down Expand Up @@ -559,10 +567,10 @@ impl<PersistedState: ChainStore> ChainState<PersistedState> {
}
}
pub fn load_chain_state(
chainstore: KvChainStore,
chainstore: PersistedState,
network: Network,
assume_valid: AssumeValidArg,
) -> Result<ChainState<KvChainStore>, BlockchainError> {
) -> Result<ChainState<PersistedState>, BlockchainError> {
let acc = Self::load_acc(&chainstore);

let best_chain = chainstore.load_height()?;
Expand Down Expand Up @@ -794,6 +802,10 @@ impl<PersistedState: ChainStore> BlockchainInterface for ChainState<PersistedSta
self.chain_params().params
}

fn acc(&self) -> Stump {
read_lock!(self).acc.to_owned()
}

fn get_fork_point(&self, block: BlockHash) -> Result<BlockHash, Self::Error> {
let fork_point = self.find_fork_point(&self.get_block_header(&block)?)?;
Ok(fork_point.block_hash())
Expand Down Expand Up @@ -1125,7 +1137,7 @@ impl<PersistedState: ChainStore> UpdatableChainstate for ChainState<PersistedSta
}

fn accept_header(&self, header: BlockHeader) -> Result<(), BlockchainError> {
trace!("Accepting header {header:?}");
debug!("Accepting header {header:?}");
let disk_header = self.get_disk_block_header(&header.block_hash());

match disk_header {
Expand All @@ -1151,7 +1163,7 @@ impl<PersistedState: ChainStore> UpdatableChainstate for ChainState<PersistedSta
// Update our current tip
if header.prev_blockhash == best_block.1 {
let height = best_block.0 + 1;
trace!("Header builds on top of our best chain");
debug!("Header builds on top of our best chain");

let mut inner = write_lock!(self);
inner.best_block.new_block(block_hash, height);
Expand All @@ -1163,7 +1175,7 @@ impl<PersistedState: ChainStore> UpdatableChainstate for ChainState<PersistedSta

inner.chainstore.update_block_index(height, block_hash)?;
} else {
trace!("Header not in the best chain");
debug!("Header not in the best chain");
self.maybe_reorg(header)?;
}

Expand Down Expand Up @@ -1247,23 +1259,24 @@ macro_rules! write_lock {
};
}

#[derive(Clone, Debug)]
#[derive(Clone, Debug, PartialEq, Eq)]
/// Internal representation of the chain we are in
pub struct BestChain {
/// Hash of the last block in the chain we believe has more work on
best_block: BlockHash,
pub best_block: BlockHash,
/// How many blocks are pilled on this chain?
depth: u32,
pub depth: u32,
/// We actually validated blocks up to this point
validation_index: BlockHash,
pub validation_index: BlockHash,
/// Blockchains are not fast-forward only, they might have "forks", sometimes it's useful
/// to keep track of them, in case they become the best one. This keeps track of some
/// tips we know about, but are not the best one. We don't keep tips that are too deep
/// or has too little work if compared to our best one
alternative_tips: Vec<BlockHash>,
pub alternative_tips: Vec<BlockHash>,
/// Saves the height occupied by the assume valid block
assume_valid_index: u32,
pub assume_valid_index: u32,
}

impl BestChain {
fn new_block(&mut self, block_hash: BlockHash, height: u32) {
self.best_block = block_hash;
Expand Down
Loading

0 comments on commit 9f33428

Please sign in to comment.