Skip to content

Commit

Permalink
Merge pull request #278 from meilisearch/combined-lmdb-support
Browse files Browse the repository at this point in the history
Combined version of LMDB mdb.master and mdb.master3
  • Loading branch information
Kerollmops authored Dec 3, 2024
2 parents 8b01852 + b47b72f commit 6e3835d
Show file tree
Hide file tree
Showing 43 changed files with 4,932 additions and 674 deletions.
91 changes: 88 additions & 3 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,33 @@ jobs:
cargo clean
cargo test
check_all_features:
check-heed3:
name: Check the heed3 project
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
include:
- os: ubuntu-latest
- os: windows-latest
- os: macos-latest

steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Run cargo check
run: |
cargo clean
bash convert-to-heed3.sh
cargo check -p heed3
check-all-features:
name: Check all the features of the heed project
runs-on: ${{ matrix.os }}
env:
Expand All @@ -51,9 +77,35 @@ jobs:
override: true
- name: Run cargo test
run: |
cd heed
cargo clean
cargo check --all-features
cargo check --all-features -p heed
check-all-features-heed3:
name: Check all the features of the heed3 project
runs-on: ${{ matrix.os }}
env:
RUSTFLAGS: -D warnings
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
include:
- os: ubuntu-latest
- os: macos-latest

steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Run cargo test
run: |
cargo clean
bash convert-to-heed3.sh
cargo check --all-features -p heed3
examples:
name: Run the heed examples
Expand Down Expand Up @@ -81,6 +133,31 @@ jobs:
cargo run --example 2>&1 | grep -E '^ ' | awk '!/rmp-serde/' | xargs -n1 cargo run --example
cargo run --example rmp-serde --features serde-rmp
heed3-examples:
name: Run the heed3 examples
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
include:
- os: ubuntu-latest
- os: macos-latest

steps:
- uses: actions/checkout@v2
with:
submodules: recursive
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- name: Run the examples
run: |
cargo clean
bash convert-to-heed3.sh
cargo run --example 2>&1 | grep -E '^ '| xargs -n1 cargo run --example
fmt:
name: Ensure the heed project is formatted
runs-on: ubuntu-latest
Expand All @@ -94,3 +171,11 @@ jobs:
components: rustfmt
- name: Run cargo fmt
run: cargo fmt --check

no-heed3-in-heed-folder:
name: Ensure heed3 is not erasing heed
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Check name is heed with grep
run: grep -q 'name = "heed"' heed/Cargo.toml
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@
path = lmdb-master-sys/lmdb
url = https://github.com/LMDB/lmdb
branch = mdb.master
[submodule "lmdb-master3-sys/lmdb"]
path = lmdb-master3-sys/lmdb
url = https://github.com/LMDB/lmdb
branch = mdb.master3
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[workspace]
members = ["lmdb-master-sys", "heed", "heed-traits", "heed-types"]
members = ["lmdb-master-sys", "lmdb-master3-sys", "heed", "heed-traits", "heed-types"]
resolver = "2"
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
<p align="center"><img width="280px" src="https://raw.githubusercontent.com/meilisearch/heed/main/assets/heed-pigeon-logo.png"></a></p>
<h1 align="center" >heed</h1>
<h1 align="center" >heed & heed3</h1>

[![License](https://img.shields.io/badge/license-MIT-green)](#LICENSE)
[![Crates.io](https://img.shields.io/crates/v/heed)](https://crates.io/crates/heed)
[![Docs](https://docs.rs/heed/badge.svg)](https://docs.rs/heed)
[![dependency status](https://deps.rs/repo/github/meilisearch/heed/status.svg)](https://deps.rs/repo/github/meilisearch/heed)
[![Build](https://github.com/meilisearch/heed/actions/workflows/rust.yml/badge.svg)](https://github.com/meilisearch/heed/actions/workflows/rust.yml)

A Rust-centric [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database) abstraction with minimal overhead. This library enables the storage of various Rust types within LMDB, extending support to include Serde-compatible types.
A Rust-centric [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database) abstraction with minimal overhead. This library enables the storage of various Rust types within LMDB, extending support to include Serde-compatible types. It not only supports the LMDB `mdb.master` branch but also the `mdb.master3` branch which features encryption-at-rest and checksumming.

## Simple Example Usage

Here is an example on how to store and read entries into LMDB in a safe and ACID way. For usage examples, see [heed/examples/](heed/examples/). To see more advanced usage techniques go check our [Cookbook](https://docs.rs/heed/latest/heed/cookbook/index.html).
Here is an example on how to store and read entries into LMDB in a safe and ACID way. For usage examples, see [examples/](examples/). To see more advanced usage techniques go check our [Cookbook](https://docs.rs/heed/latest/heed/cookbook/index.html).

```rust
use std::fs;
Expand Down Expand Up @@ -46,6 +46,13 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
}
```

## Working with two Crates: heed and heed3

The heed and heed3 crates manage a shared codebase. Within the heed3 folder, you can find the Cargo.toml specific to the heed3 crate.
To facilitate work on heed3, utilize the `convert-to-heed3.sh` script.

This script conveniently moves the `heed3/Cargo.toml` file to the `heed/` folder, updates the `heed::` references to `heed3::`, and generates a commit for easy rollback if needed.

## Building from Source

You can use this command to clone the repository:
Expand Down
36 changes: 36 additions & 0 deletions convert-to-heed3.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env bash

# This script is meant to setup the heed3 crate.
#

if [[ -n $(git status -s) ]]; then
echo "Error: Repository is git dirty, please commit or stash changes before running this script."
exit 1
fi

set -e

# It basically copy the heed3/Cargo.toml file into
# the heed folder...
if [[ "$OSTYPE" == "cygwin" || "$OSTYPE" == "msys" ]]; then
cp heed3\\Cargo.toml heed\\Cargo.toml
else
cp heed3/Cargo.toml heed/Cargo.toml
fi

# ...and replaces the `heed::` string by the `heed3::` one.
for file in $(find heed/src -type f -name "*.rs"); do
if [[ "$OSTYPE" == "darwin"* ]]; then
sed -i '' 's/heed::/heed3::/g' "$file"
else
sed -i 's/heed::/heed3::/g' "$file"
fi
done

# Make it easier to rollback by doing a commit
git config --global user.email "[email protected]"
git config --global user.name "The CI"
git commit -am 'remove-me: heed3 changes generate by the convert-to-heed3.sh script'

echo "Heed3 crate setup completed successfully. Configurations for the heed crate have been copied and modified."
echo "A commit (starting with remove-me) has been generated and must be deleted before merging into the main branch."
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
111 changes: 111 additions & 0 deletions examples/heed3-all-types.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
use std::error::Error;
use std::fs;
use std::path::Path;

use heed3::byteorder::BE;
use heed3::types::*;
use heed3::{Database, EnvOpenOptions};
use serde::{Deserialize, Serialize};

fn main() -> Result<(), Box<dyn Error>> {
let path = Path::new("target").join("heed3.mdb");

fs::create_dir_all(&path)?;

let env = unsafe {
EnvOpenOptions::new()
.map_size(10 * 1024 * 1024) // 10MB
.max_dbs(3000)
.open(path)?
};

// here the key will be an str and the data will be a slice of u8
let mut wtxn = env.write_txn()?;
let db: Database<Str, Bytes> = env.create_database(&mut wtxn, Some("kiki"))?;

db.put(&mut wtxn, "hello", &[2, 3][..])?;
let ret: Option<&[u8]> = db.get(&wtxn, "hello")?;

println!("{:?}", ret);
wtxn.commit()?;

// serde types are also supported!!!
#[derive(Debug, Serialize, Deserialize)]
struct Hello<'a> {
string: &'a str,
}

let mut wtxn = env.write_txn()?;
let db: Database<Str, SerdeBincode<Hello>> =
env.create_database(&mut wtxn, Some("serde-bincode"))?;

let hello = Hello { string: "hi" };
db.put(&mut wtxn, "hello", &hello)?;

let ret: Option<Hello> = db.get(&wtxn, "hello")?;
println!("serde-bincode:\t{:?}", ret);

wtxn.commit()?;

let mut wtxn = env.write_txn()?;
let db: Database<Str, SerdeJson<Hello>> = env.create_database(&mut wtxn, Some("serde-json"))?;

let hello = Hello { string: "hi" };
db.put(&mut wtxn, "hello", &hello)?;

let ret: Option<Hello> = db.get(&wtxn, "hello")?;
println!("serde-json:\t{:?}", ret);

wtxn.commit()?;

// you can ignore the data
let mut wtxn = env.write_txn()?;
let db: Database<Str, Unit> = env.create_database(&mut wtxn, Some("ignored-data"))?;

db.put(&mut wtxn, "hello", &())?;
let ret: Option<()> = db.get(&wtxn, "hello")?;

println!("{:?}", ret);

let ret: Option<()> = db.get(&wtxn, "non-existant")?;

println!("{:?}", ret);
wtxn.commit()?;

// database opening and types are tested in a safe way
//
// we try to open a database twice with the same types
let mut wtxn = env.write_txn()?;
let _db: Database<Str, Unit> = env.create_database(&mut wtxn, Some("ignored-data"))?;

// you can iterate over keys in order
type BEI64 = I64<BE>;

let db: Database<BEI64, Unit> = env.create_database(&mut wtxn, Some("big-endian-iter"))?;

db.put(&mut wtxn, &0, &())?;
db.put(&mut wtxn, &68, &())?;
db.put(&mut wtxn, &35, &())?;
db.put(&mut wtxn, &42, &())?;

let rets: Result<Vec<(i64, _)>, _> = db.iter(&wtxn)?.collect();

println!("{:?}", rets);

// or iterate over ranges too!!!
let range = 35..=42;
let rets: Result<Vec<(i64, _)>, _> = db.range(&wtxn, &range)?.collect();

println!("{:?}", rets);

// delete a range of key
let range = 35..=42;
let deleted: usize = db.delete_range(&mut wtxn, &range)?;

let rets: Result<Vec<(i64, _)>, _> = db.iter(&wtxn)?.collect();

println!("deleted: {:?}, {:?}", deleted, rets);
wtxn.commit()?;

Ok(())
}
59 changes: 59 additions & 0 deletions examples/heed3-encrypted.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
use std::error::Error;
use std::fs;
use std::path::Path;

use argon2::Argon2;
use chacha20poly1305::{ChaCha20Poly1305, Key};
use heed3::types::*;
use heed3::EnvOpenOptions;

fn main() -> Result<(), Box<dyn Error>> {
let env_path = Path::new("target").join("encrypt.mdb");
let password = "This is the password that will be hashed by the argon2 algorithm";
let salt = "The salt added to the password hashes to add more security when stored";

let _ = fs::remove_dir_all(&env_path);
fs::create_dir_all(&env_path)?;

// We choose to use argon2 as our Key Derivation Function, but you can choose whatever you want.
// <https://github.com/RustCrypto/traits/tree/master/password-hash#supported-crates>
let mut key = Key::default();
Argon2::default().hash_password_into(password.as_bytes(), salt.as_bytes(), &mut key)?;

// We open the environment
let mut options = EnvOpenOptions::new();
let env = unsafe {
options
.map_size(10 * 1024 * 1024) // 10MB
.max_dbs(3)
.open_encrypted::<ChaCha20Poly1305, _>(key, &env_path)?
};

let key1 = "first-key";
let val1 = "this is a secret info";
let key2 = "second-key";
let val2 = "this is another secret info";

// We create database and write secret values in it
let mut wtxn = env.write_txn()?;
let db = env.create_database::<Str, Str>(&mut wtxn, Some("first"))?;
db.put(&mut wtxn, key1, val1)?;
db.put(&mut wtxn, key2, val2)?;
wtxn.commit()?;
env.prepare_for_closing().wait();

// We reopen the environment now
let env = unsafe { options.open_encrypted::<ChaCha20Poly1305, _>(key, &env_path)? };

// We check that the secret entries are correctly decrypted
let mut rtxn = env.read_txn()?;
let db = env.open_database::<Str, Str>(&rtxn, Some("first"))?.unwrap();
let mut iter = db.iter(&mut rtxn)?;
assert_eq!(iter.next().transpose()?, Some((key1, val1)));
assert_eq!(iter.next().transpose()?, Some((key2, val2)));
assert_eq!(iter.next().transpose()?, None);

eprintln!("Successful test!");

Ok(())
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion heed-types/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "heed-types"
version = "0.20.1"
version = "0.21.0"
authors = ["Kerollmops <[email protected]>"]
description = "The types used with the fully typed LMDB wrapper, heed"
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion heed-types/src/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub enum Bytes {}
impl<'a> BytesEncode<'a> for Bytes {
type EItem = [u8];

fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
Ok(Cow::Borrowed(item))
}
}
Expand Down
2 changes: 1 addition & 1 deletion heed-types/src/serde_bincode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ where
{
type EItem = T;

fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
bincode::serialize(item).map(Cow::Owned).map_err(Into::into)
}
}
Expand Down
Loading

0 comments on commit 6e3835d

Please sign in to comment.