From 13a78ff6f8c6df7ddfa65de315687141e4c3fdc1 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Fri, 16 Sep 2022 21:05:11 +0200 Subject: [PATCH] Expose the LMDB encrypt/decrypt and checksum features --- heed/Cargo.toml | 4 +- heed/examples/encrypt.rs | 83 ++++++++++++++++ heed/src/env.rs | 198 +++++++++++++++++++++++++++++++++++++-- heed/src/lib.rs | 5 +- heed/src/mdb/lmdb_ffi.rs | 11 ++- 5 files changed, 286 insertions(+), 15 deletions(-) create mode 100644 heed/examples/encrypt.rs diff --git a/heed/Cargo.toml b/heed/Cargo.toml index 5fa5fc32..9bd45f9f 100644 --- a/heed/Cargo.toml +++ b/heed/Cargo.toml @@ -23,8 +23,10 @@ serde = { version = "1.0.144", features = ["derive"], optional = true } synchronoise = "1.0.1" [dev-dependencies] -serde = { version = "1.0.144", features = ["derive"] } bytemuck = { version = "1.12.1", features = ["derive"] } +chacha20 = "0.9.0" +crc32fast = "1.3.2" +serde = { version = "1.0.144", features = ["derive"] } tempfile = "3.3.0" [target.'cfg(windows)'.dependencies] diff --git a/heed/examples/encrypt.rs b/heed/examples/encrypt.rs new file mode 100644 index 00000000..65fe0fd3 --- /dev/null +++ b/heed/examples/encrypt.rs @@ -0,0 +1,83 @@ +use std::error::Error; +use std::fs; +use std::path::Path; + +use chacha20::cipher::{KeyIvInit, StreamCipher}; +use chacha20::ChaCha20; +use heed::types::*; +use heed::{Checksum, Database, Encrypt, EncryptDecrypt, EnvOpenOptions}; + +enum Crc32Checksum {} + +impl Checksum for Crc32Checksum { + const SIZE: u32 = 32 / 8; + + fn checksum(input: &[u8], output: &mut [u8], _key: Option<&[u8]>) { + let checksum = crc32fast::hash(input); + output.copy_from_slice(&checksum.to_le_bytes()); + } +} + +enum Chacha20Encrypt {} + +impl Encrypt for Chacha20Encrypt { + fn encrypt_decrypt( + _action: EncryptDecrypt, + input: &[u8], + output: &mut [u8], + key: &[u8], + iv: &[u8], + _auth: &[u8], + ) -> Result<(), ()> { + Ok(ChaCha20::new_from_slices(key, &iv[..12]) + .map_err(drop)? + .apply_keystream_b2b(input, output) + .map_err(drop)?) + } +} + +fn main() -> Result<(), Box> { + let env_path = Path::new("target").join("encrypt.mdb"); + let password: &[_; 32] = b"I told you this is my password!!"; + let mac_size = 0; + + let _ = fs::remove_dir_all(&env_path); + fs::create_dir_all(&env_path)?; + + // We open the environment + let mut options = EnvOpenOptions::new() + .encrypt_with::(password.to_vec(), mac_size) + // By setting the checksum function we will have checksum errors if the decryption + // fail instead of random LMDB errors due to invalid data in the decrypted pages + .checksum_with::(); + let env = options + .map_size(10 * 1024 * 1024) // 10MB + .max_dbs(3) + .open(&env_path)?; + + let key1 = "first-key"; + let val1 = "this is a secret info"; + let key2 = "second-key"; + let val2 = "this is another secret info"; + + // We create database and write secret values in it + let mut wtxn = env.write_txn()?; + let db: Database = env.create_database(&mut wtxn, Some("first"))?; + db.put(&mut wtxn, key1, val1)?; + db.put(&mut wtxn, key2, val2)?; + wtxn.commit()?; + env.prepare_for_closing().wait(); + + // We reopen the environment now + let env = options.open(&env_path)?; + + // We check that the secret entries are correctly decrypted + let mut rtxn = env.write_txn()?; + let db: Database = env.open_database(&mut rtxn, Some("first"))?.unwrap(); + let mut iter = db.iter(&rtxn)?; + assert_eq!(iter.next().transpose()?, Some((key1, val1))); + assert_eq!(iter.next().transpose()?, Some((key2, val2))); + assert_eq!(iter.next().transpose()?, None); + + Ok(()) +} diff --git a/heed/src/env.rs b/heed/src/env.rs index e3fe5859..651a2ad3 100644 --- a/heed/src/env.rs +++ b/heed/src/env.rs @@ -2,12 +2,14 @@ use std::any::TypeId; use std::collections::hash_map::{Entry, HashMap}; use std::ffi::{c_void, CString}; use std::fs::{File, Metadata}; +use std::marker::PhantomData; #[cfg(unix)] use std::os::unix::{ ffi::OsStrExt, io::{AsRawFd, BorrowedFd, RawFd}, }; use std::path::{Path, PathBuf}; +use std::result::Result as StdResult; use std::sync::{Arc, RwLock}; use std::time::Duration; #[cfg(windows)] @@ -15,8 +17,9 @@ use std::{ ffi::OsStr, os::windows::io::{AsRawHandle, BorrowedHandle, RawHandle}, }; -use std::{io, mem, ptr, sync}; +use std::{fmt, io, mem, ptr, sync}; +use lmdb_master3_sys::MDB_val; use once_cell::sync::Lazy; use synchronoise::event::SignalEvent; @@ -34,7 +37,31 @@ static OPENED_ENV: Lazy>> = Lazy::new(RwLock:: struct EnvEntry { env: Option, signal_event: Arc, - options: EnvOpenOptions, + options: InternalOpenOptions, +} + +#[derive(PartialEq)] +struct InternalOpenOptions { + is_checksumming: bool, + is_encrypted: bool, + map_size: Option, + max_readers: Option, + max_dbs: Option, + flags: u32, +} + +impl From<&EnvOpenOptions> for InternalOpenOptions { + fn from(eoo: &EnvOpenOptions) -> InternalOpenOptions { + let EnvOpenOptions { checksum, encrypt, map_size, max_readers, max_dbs, flags } = eoo; + InternalOpenOptions { + is_checksumming: checksum.is_some(), + is_encrypted: encrypt.is_some(), + map_size: *map_size, + max_readers: *max_readers, + max_dbs: *max_dbs, + flags: *flags, + } + } } // Thanks to the mozilla/rkv project @@ -92,22 +119,93 @@ unsafe fn metadata_from_fd(raw_fd: RawHandle) -> io::Result { File::from(owned).metadata() } +// TODO should it be called like this? +pub trait Checksum { + const SIZE: u32; + fn checksum(input: &[u8], output: &mut [u8], key: Option<&[u8]>); +} + +pub enum DummyChecksum {} + +impl Checksum for DummyChecksum { + const SIZE: u32 = 32 / 8; + fn checksum(_input: &[u8], _output: &mut [u8], _key: Option<&[u8]>) {} +} + +pub trait Encrypt { + fn encrypt_decrypt( + action: EncryptDecrypt, + input: &[u8], + output: &mut [u8], + key: &[u8], + iv: &[u8], + auth: &[u8], + ) -> StdResult<(), ()>; +} + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum EncryptDecrypt { + Encrypt, + Decrypt, +} + +/// This type must not be used and is, therefore, not exposed at the library root. +pub enum DummyEncrypt {} + +impl Encrypt for DummyEncrypt { + fn encrypt_decrypt( + _action: EncryptDecrypt, + _input: &[u8], + _output: &mut [u8], + _key: &[u8], + _iv: &[u8], + _auth: &[u8], + ) -> StdResult<(), ()> { + Err(()) + } +} + /// Options and flags which can be used to configure how an environment is opened. -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] -pub struct EnvOpenOptions { +pub struct EnvOpenOptions { + checksum: Option>, + encrypt: Option<(PhantomData, Vec, u32)>, map_size: Option, max_readers: Option, max_dbs: Option, - flags: u32, // LMDB flags + flags: u32, +} + +impl fmt::Debug for EnvOpenOptions { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let EnvOpenOptions { checksum, encrypt, map_size, max_readers, max_dbs, flags } = self; + f.debug_struct("EnvOpenOptions") + .field("checksum", &checksum.is_some()) + .field("encrypt", &encrypt.is_some()) + .field("map_size", &map_size) + .field("max_readers", &max_readers) + .field("max_dbs", &max_dbs) + .field("flags", &flags) + .finish() + } } impl EnvOpenOptions { /// Creates a blank new set of options ready for configuration. pub fn new() -> EnvOpenOptions { - EnvOpenOptions { map_size: None, max_readers: None, max_dbs: None, flags: 0 } + EnvOpenOptions { + checksum: None, + encrypt: None, + map_size: None, + max_readers: None, + max_dbs: None, + flags: 0, + } } +} +impl EnvOpenOptions { /// Set the size of the memory map to use for this environment. pub fn map_size(&mut self, size: usize) -> &mut Self { self.map_size = Some(size); @@ -126,6 +224,30 @@ impl EnvOpenOptions { self } + pub fn encrypt_with(self, key: Vec, auth_size: u32) -> EnvOpenOptions { + let EnvOpenOptions { checksum, encrypt: _, map_size, max_readers, max_dbs, flags } = self; + EnvOpenOptions { + checksum, + encrypt: Some((PhantomData, key, auth_size)), + map_size, + max_readers, + max_dbs, + flags, + } + } + + pub fn checksum_with(self) -> EnvOpenOptions { + let EnvOpenOptions { checksum: _, encrypt, map_size, max_readers, max_dbs, flags } = self; + EnvOpenOptions { + checksum: Some(PhantomData), + encrypt, + map_size, + max_readers, + max_dbs, + flags, + } + } + /// Set one or [more LMDB flags](http://www.lmdb.tech/doc/group__mdb__env.html). /// ``` /// use std::fs; @@ -179,9 +301,10 @@ impl EnvOpenOptions { let mut lock = OPENED_ENV.write().unwrap(); + let internal_open_options = InternalOpenOptions::from(self); match lock.entry(path) { Entry::Occupied(entry) => { - if &entry.get().options != self { + if entry.get().options != internal_open_options { return Err(Error::BadOpenOptions); } entry.get().env.clone().ok_or(Error::DatabaseClosing) @@ -194,6 +317,24 @@ impl EnvOpenOptions { let mut env: *mut ffi::MDB_env = ptr::null_mut(); mdb_result(ffi::mdb_env_create(&mut env))?; + if let Some(_marker) = &self.checksum { + mdb_result(ffi::mdb_env_set_checksum( + env, + Some(checksum_func_wrapper::), + C::SIZE, + ))?; + } + + if let Some((_marker, key, auth_size)) = &self.encrypt { + let key = crate::into_val(key); + mdb_result(ffi::mdb_env_set_encrypt( + env, + Some(encrypt_func_wrapper::), + &key, + *auth_size, + ))?; + } + if let Some(size) = self.map_size { if size % page_size::get() != 0 { let msg = format!( @@ -240,7 +381,7 @@ impl EnvOpenOptions { let env = Env(Arc::new(inner)); let cache_entry = EnvEntry { env: Some(env.clone()), - options: self.clone(), + options: internal_open_options, signal_event, }; entry.insert(cache_entry); @@ -257,6 +398,47 @@ impl EnvOpenOptions { } } +/// The wrapper function that is called by LMDB that directly calls +/// the Rust idiomatic function internally. +unsafe extern "C" fn encrypt_func_wrapper( + src: *const MDB_val, + dst: *mut MDB_val, + key_ptr: *const MDB_val, + encdec: i32, +) -> i32 { + let input = std::slice::from_raw_parts((*src).mv_data as *const u8, (*src).mv_size); + let output = std::slice::from_raw_parts_mut((*dst).mv_data as *mut u8, (*dst).mv_size); + let key = std::slice::from_raw_parts((*key_ptr).mv_data as *const u8, (*key_ptr).mv_size); + let iv = std::slice::from_raw_parts( + (*key_ptr.offset(1)).mv_data as *const u8, + (*key_ptr.offset(1)).mv_size, + ); + let auth = std::slice::from_raw_parts( + (*key_ptr.offset(2)).mv_data as *const u8, + (*key_ptr.offset(2)).mv_size, + ); + + let action = if encdec == 1 { EncryptDecrypt::Encrypt } else { EncryptDecrypt::Decrypt }; + E::encrypt_decrypt(action, input, output, key, iv, auth).is_err() as i32 +} + +/// The wrapper function that is called by LMDB that directly calls +/// the Rust idiomatic function internally. +unsafe extern "C" fn checksum_func_wrapper( + src: *const MDB_val, + dst: *mut MDB_val, + key_ptr: *const MDB_val, +) { + let input = std::slice::from_raw_parts((*src).mv_data as *const u8, (*src).mv_size); + let output = std::slice::from_raw_parts_mut((*dst).mv_data as *mut u8, (*dst).mv_size); + let key = if key_ptr.is_null() { + None + } else { + Some(std::slice::from_raw_parts((*key_ptr).mv_data as *const u8, (*key_ptr).mv_size)) + }; + C::checksum(input, output, key) +} + /// Returns a struct that allows to wait for the effective closing of an environment. pub fn env_closing_event>(path: P) -> Option { let lock = OPENED_ENV.read().unwrap(); diff --git a/heed/src/lib.rs b/heed/src/lib.rs index dca0fca3..4ee88ab6 100644 --- a/heed/src/lib.rs +++ b/heed/src/lib.rs @@ -63,7 +63,10 @@ pub use {bytemuck, byteorder, heed_types as types}; use self::cursor::{RoCursor, RwCursor}; pub use self::db::{Database, PolyDatabase}; -pub use self::env::{env_closing_event, CompactionOption, Env, EnvClosingEvent, EnvOpenOptions}; +pub use self::env::{ + env_closing_event, Checksum, CompactionOption, Encrypt, EncryptDecrypt, Env, EnvClosingEvent, + EnvOpenOptions, +}; pub use self::iter::{ RoIter, RoPrefix, RoRange, RoRevIter, RoRevPrefix, RoRevRange, RwIter, RwPrefix, RwRange, RwRevIter, RwRevPrefix, RwRevRange, diff --git a/heed/src/mdb/lmdb_ffi.rs b/heed/src/mdb/lmdb_ffi.rs index d955f1d2..4733f84f 100644 --- a/heed/src/mdb/lmdb_ffi.rs +++ b/heed/src/mdb/lmdb_ffi.rs @@ -3,11 +3,12 @@ use std::ptr; pub use ffi::{ mdb_cursor_close, mdb_cursor_del, mdb_cursor_get, mdb_cursor_open, mdb_cursor_put, mdb_dbi_close, mdb_dbi_open, mdb_del, mdb_drop, mdb_env_close, mdb_env_copyfd2, mdb_env_create, - mdb_env_get_fd, mdb_env_get_flags, mdb_env_info, mdb_env_open, mdb_env_set_mapsize, - mdb_env_set_maxdbs, mdb_env_set_maxreaders, mdb_env_stat, mdb_env_sync, mdb_filehandle_t, - mdb_get, mdb_put, mdb_reader_check, mdb_stat, mdb_txn_abort, mdb_txn_begin, mdb_txn_commit, - MDB_cursor, MDB_dbi, MDB_env, MDB_envinfo, MDB_stat, MDB_txn, MDB_val, MDB_APPEND, - MDB_CP_COMPACT, MDB_CREATE, MDB_CURRENT, MDB_RDONLY, MDB_RESERVE, + mdb_env_get_fd, mdb_env_get_flags, mdb_env_info, mdb_env_open, mdb_env_set_checksum, + mdb_env_set_encrypt, mdb_env_set_mapsize, mdb_env_set_maxdbs, mdb_env_set_maxreaders, + mdb_env_stat, mdb_env_sync, mdb_filehandle_t, mdb_get, mdb_put, mdb_reader_check, mdb_stat, + mdb_txn_abort, mdb_txn_begin, mdb_txn_commit, MDB_cursor, MDB_dbi, MDB_enc_func, MDB_env, + MDB_envinfo, MDB_stat, MDB_txn, MDB_val, MDB_APPEND, MDB_CP_COMPACT, MDB_CREATE, MDB_CURRENT, + MDB_RDONLY, MDB_RESERVE, }; use lmdb_master3_sys as ffi;