Skip to content

Commit

Permalink
Expose the LMDB encrypt/decrypt and checksum features
Browse files Browse the repository at this point in the history
  • Loading branch information
Kerollmops committed Sep 23, 2022
1 parent 6bbd0b8 commit 13a78ff
Show file tree
Hide file tree
Showing 5 changed files with 286 additions and 15 deletions.
4 changes: 3 additions & 1 deletion heed/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ serde = { version = "1.0.144", features = ["derive"], optional = true }
synchronoise = "1.0.1"

[dev-dependencies]
serde = { version = "1.0.144", features = ["derive"] }
bytemuck = { version = "1.12.1", features = ["derive"] }
chacha20 = "0.9.0"
crc32fast = "1.3.2"
serde = { version = "1.0.144", features = ["derive"] }
tempfile = "3.3.0"

[target.'cfg(windows)'.dependencies]
Expand Down
83 changes: 83 additions & 0 deletions heed/examples/encrypt.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
use std::error::Error;
use std::fs;
use std::path::Path;

use chacha20::cipher::{KeyIvInit, StreamCipher};
use chacha20::ChaCha20;
use heed::types::*;
use heed::{Checksum, Database, Encrypt, EncryptDecrypt, EnvOpenOptions};

enum Crc32Checksum {}

impl Checksum for Crc32Checksum {
const SIZE: u32 = 32 / 8;

fn checksum(input: &[u8], output: &mut [u8], _key: Option<&[u8]>) {
let checksum = crc32fast::hash(input);
output.copy_from_slice(&checksum.to_le_bytes());
}
}

enum Chacha20Encrypt {}

impl Encrypt for Chacha20Encrypt {
fn encrypt_decrypt(
_action: EncryptDecrypt,
input: &[u8],
output: &mut [u8],
key: &[u8],
iv: &[u8],
_auth: &[u8],
) -> Result<(), ()> {
Ok(ChaCha20::new_from_slices(key, &iv[..12])
.map_err(drop)?
.apply_keystream_b2b(input, output)
.map_err(drop)?)
}
}

fn main() -> Result<(), Box<dyn Error>> {
let env_path = Path::new("target").join("encrypt.mdb");
let password: &[_; 32] = b"I told you this is my password!!";
let mac_size = 0;

let _ = fs::remove_dir_all(&env_path);
fs::create_dir_all(&env_path)?;

// We open the environment
let mut options = EnvOpenOptions::new()
.encrypt_with::<Chacha20Encrypt>(password.to_vec(), mac_size)
// By setting the checksum function we will have checksum errors if the decryption
// fail instead of random LMDB errors due to invalid data in the decrypted pages
.checksum_with::<Crc32Checksum>();
let env = options
.map_size(10 * 1024 * 1024) // 10MB
.max_dbs(3)
.open(&env_path)?;

let key1 = "first-key";
let val1 = "this is a secret info";
let key2 = "second-key";
let val2 = "this is another secret info";

// We create database and write secret values in it
let mut wtxn = env.write_txn()?;
let db: Database<Str, Str> = env.create_database(&mut wtxn, Some("first"))?;
db.put(&mut wtxn, key1, val1)?;
db.put(&mut wtxn, key2, val2)?;
wtxn.commit()?;
env.prepare_for_closing().wait();

// We reopen the environment now
let env = options.open(&env_path)?;

// We check that the secret entries are correctly decrypted
let mut rtxn = env.write_txn()?;
let db: Database<Str, Str> = env.open_database(&mut rtxn, Some("first"))?.unwrap();
let mut iter = db.iter(&rtxn)?;
assert_eq!(iter.next().transpose()?, Some((key1, val1)));
assert_eq!(iter.next().transpose()?, Some((key2, val2)));
assert_eq!(iter.next().transpose()?, None);

Ok(())
}
198 changes: 190 additions & 8 deletions heed/src/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,24 @@ use std::any::TypeId;
use std::collections::hash_map::{Entry, HashMap};
use std::ffi::{c_void, CString};
use std::fs::{File, Metadata};
use std::marker::PhantomData;
#[cfg(unix)]
use std::os::unix::{
ffi::OsStrExt,
io::{AsRawFd, BorrowedFd, RawFd},
};
use std::path::{Path, PathBuf};
use std::result::Result as StdResult;
use std::sync::{Arc, RwLock};
use std::time::Duration;
#[cfg(windows)]
use std::{
ffi::OsStr,
os::windows::io::{AsRawHandle, BorrowedHandle, RawHandle},
};
use std::{io, mem, ptr, sync};
use std::{fmt, io, mem, ptr, sync};

use lmdb_master3_sys::MDB_val;
use once_cell::sync::Lazy;
use synchronoise::event::SignalEvent;

Expand All @@ -34,7 +37,31 @@ static OPENED_ENV: Lazy<RwLock<HashMap<PathBuf, EnvEntry>>> = Lazy::new(RwLock::
struct EnvEntry {
env: Option<Env>,
signal_event: Arc<SignalEvent>,
options: EnvOpenOptions,
options: InternalOpenOptions,
}

#[derive(PartialEq)]
struct InternalOpenOptions {
is_checksumming: bool,
is_encrypted: bool,
map_size: Option<usize>,
max_readers: Option<u32>,
max_dbs: Option<u32>,
flags: u32,
}

impl<E: Encrypt, C: Checksum> From<&EnvOpenOptions<E, C>> for InternalOpenOptions {
fn from(eoo: &EnvOpenOptions<E, C>) -> InternalOpenOptions {
let EnvOpenOptions { checksum, encrypt, map_size, max_readers, max_dbs, flags } = eoo;
InternalOpenOptions {
is_checksumming: checksum.is_some(),
is_encrypted: encrypt.is_some(),
map_size: *map_size,
max_readers: *max_readers,
max_dbs: *max_dbs,
flags: *flags,
}
}
}

// Thanks to the mozilla/rkv project
Expand Down Expand Up @@ -92,22 +119,93 @@ unsafe fn metadata_from_fd(raw_fd: RawHandle) -> io::Result<Metadata> {
File::from(owned).metadata()
}

// TODO should it be called like this?
pub trait Checksum {
const SIZE: u32;
fn checksum(input: &[u8], output: &mut [u8], key: Option<&[u8]>);
}

pub enum DummyChecksum {}

impl Checksum for DummyChecksum {
const SIZE: u32 = 32 / 8;
fn checksum(_input: &[u8], _output: &mut [u8], _key: Option<&[u8]>) {}
}

pub trait Encrypt {
fn encrypt_decrypt(
action: EncryptDecrypt,
input: &[u8],
output: &mut [u8],
key: &[u8],
iv: &[u8],
auth: &[u8],
) -> StdResult<(), ()>;
}

#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum EncryptDecrypt {
Encrypt,
Decrypt,
}

/// This type must not be used and is, therefore, not exposed at the library root.
pub enum DummyEncrypt {}

impl Encrypt for DummyEncrypt {
fn encrypt_decrypt(
_action: EncryptDecrypt,
_input: &[u8],
_output: &mut [u8],
_key: &[u8],
_iv: &[u8],
_auth: &[u8],
) -> StdResult<(), ()> {
Err(())
}
}

/// Options and flags which can be used to configure how an environment is opened.
#[derive(Clone, Debug, PartialEq)]
#[derive(Clone, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct EnvOpenOptions {
pub struct EnvOpenOptions<E: Encrypt = DummyEncrypt, C: Checksum = DummyChecksum> {
checksum: Option<PhantomData<C>>,
encrypt: Option<(PhantomData<E>, Vec<u8>, u32)>,
map_size: Option<usize>,
max_readers: Option<u32>,
max_dbs: Option<u32>,
flags: u32, // LMDB flags
flags: u32,
}

impl<E: Encrypt, C: Checksum> fmt::Debug for EnvOpenOptions<E, C> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let EnvOpenOptions { checksum, encrypt, map_size, max_readers, max_dbs, flags } = self;
f.debug_struct("EnvOpenOptions")
.field("checksum", &checksum.is_some())
.field("encrypt", &encrypt.is_some())
.field("map_size", &map_size)
.field("max_readers", &max_readers)
.field("max_dbs", &max_dbs)
.field("flags", &flags)
.finish()
}
}

impl EnvOpenOptions {
/// Creates a blank new set of options ready for configuration.
pub fn new() -> EnvOpenOptions {
EnvOpenOptions { map_size: None, max_readers: None, max_dbs: None, flags: 0 }
EnvOpenOptions {
checksum: None,
encrypt: None,
map_size: None,
max_readers: None,
max_dbs: None,
flags: 0,
}
}
}

impl<E: Encrypt, C: Checksum> EnvOpenOptions<E, C> {
/// Set the size of the memory map to use for this environment.
pub fn map_size(&mut self, size: usize) -> &mut Self {
self.map_size = Some(size);
Expand All @@ -126,6 +224,30 @@ impl EnvOpenOptions {
self
}

pub fn encrypt_with<F: Encrypt>(self, key: Vec<u8>, auth_size: u32) -> EnvOpenOptions<F, C> {
let EnvOpenOptions { checksum, encrypt: _, map_size, max_readers, max_dbs, flags } = self;
EnvOpenOptions {
checksum,
encrypt: Some((PhantomData, key, auth_size)),
map_size,
max_readers,
max_dbs,
flags,
}
}

pub fn checksum_with<D: Checksum>(self) -> EnvOpenOptions<E, D> {
let EnvOpenOptions { checksum: _, encrypt, map_size, max_readers, max_dbs, flags } = self;
EnvOpenOptions {
checksum: Some(PhantomData),
encrypt,
map_size,
max_readers,
max_dbs,
flags,
}
}

/// Set one or [more LMDB flags](http://www.lmdb.tech/doc/group__mdb__env.html).
/// ```
/// use std::fs;
Expand Down Expand Up @@ -179,9 +301,10 @@ impl EnvOpenOptions {

let mut lock = OPENED_ENV.write().unwrap();

let internal_open_options = InternalOpenOptions::from(self);
match lock.entry(path) {
Entry::Occupied(entry) => {
if &entry.get().options != self {
if entry.get().options != internal_open_options {
return Err(Error::BadOpenOptions);
}
entry.get().env.clone().ok_or(Error::DatabaseClosing)
Expand All @@ -194,6 +317,24 @@ impl EnvOpenOptions {
let mut env: *mut ffi::MDB_env = ptr::null_mut();
mdb_result(ffi::mdb_env_create(&mut env))?;

if let Some(_marker) = &self.checksum {
mdb_result(ffi::mdb_env_set_checksum(
env,
Some(checksum_func_wrapper::<C>),
C::SIZE,
))?;
}

if let Some((_marker, key, auth_size)) = &self.encrypt {
let key = crate::into_val(key);
mdb_result(ffi::mdb_env_set_encrypt(
env,
Some(encrypt_func_wrapper::<E>),
&key,
*auth_size,
))?;
}

if let Some(size) = self.map_size {
if size % page_size::get() != 0 {
let msg = format!(
Expand Down Expand Up @@ -240,7 +381,7 @@ impl EnvOpenOptions {
let env = Env(Arc::new(inner));
let cache_entry = EnvEntry {
env: Some(env.clone()),
options: self.clone(),
options: internal_open_options,
signal_event,
};
entry.insert(cache_entry);
Expand All @@ -257,6 +398,47 @@ impl EnvOpenOptions {
}
}

/// The wrapper function that is called by LMDB that directly calls
/// the Rust idiomatic function internally.
unsafe extern "C" fn encrypt_func_wrapper<E: Encrypt>(
src: *const MDB_val,
dst: *mut MDB_val,
key_ptr: *const MDB_val,
encdec: i32,
) -> i32 {
let input = std::slice::from_raw_parts((*src).mv_data as *const u8, (*src).mv_size);
let output = std::slice::from_raw_parts_mut((*dst).mv_data as *mut u8, (*dst).mv_size);
let key = std::slice::from_raw_parts((*key_ptr).mv_data as *const u8, (*key_ptr).mv_size);
let iv = std::slice::from_raw_parts(
(*key_ptr.offset(1)).mv_data as *const u8,
(*key_ptr.offset(1)).mv_size,
);
let auth = std::slice::from_raw_parts(
(*key_ptr.offset(2)).mv_data as *const u8,
(*key_ptr.offset(2)).mv_size,
);

let action = if encdec == 1 { EncryptDecrypt::Encrypt } else { EncryptDecrypt::Decrypt };
E::encrypt_decrypt(action, input, output, key, iv, auth).is_err() as i32
}

/// The wrapper function that is called by LMDB that directly calls
/// the Rust idiomatic function internally.
unsafe extern "C" fn checksum_func_wrapper<C: Checksum>(
src: *const MDB_val,
dst: *mut MDB_val,
key_ptr: *const MDB_val,
) {
let input = std::slice::from_raw_parts((*src).mv_data as *const u8, (*src).mv_size);
let output = std::slice::from_raw_parts_mut((*dst).mv_data as *mut u8, (*dst).mv_size);
let key = if key_ptr.is_null() {
None
} else {
Some(std::slice::from_raw_parts((*key_ptr).mv_data as *const u8, (*key_ptr).mv_size))
};
C::checksum(input, output, key)
}

/// Returns a struct that allows to wait for the effective closing of an environment.
pub fn env_closing_event<P: AsRef<Path>>(path: P) -> Option<EnvClosingEvent> {
let lock = OPENED_ENV.read().unwrap();
Expand Down
5 changes: 4 additions & 1 deletion heed/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,10 @@ pub use {bytemuck, byteorder, heed_types as types};

use self::cursor::{RoCursor, RwCursor};
pub use self::db::{Database, PolyDatabase};
pub use self::env::{env_closing_event, CompactionOption, Env, EnvClosingEvent, EnvOpenOptions};
pub use self::env::{
env_closing_event, Checksum, CompactionOption, Encrypt, EncryptDecrypt, Env, EnvClosingEvent,
EnvOpenOptions,
};
pub use self::iter::{
RoIter, RoPrefix, RoRange, RoRevIter, RoRevPrefix, RoRevRange, RwIter, RwPrefix, RwRange,
RwRevIter, RwRevPrefix, RwRevRange,
Expand Down
Loading

0 comments on commit 13a78ff

Please sign in to comment.