Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Break and improve the Env::copy_to_file method #312

Merged
merged 2 commits into from
Feb 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions heed/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ serde = { version = "1.0.217", features = ["derive"], optional = true }
synchronoise = "1.0.1"

[dev-dependencies]
memchr = "2.7.4"
serde = { version = "1.0.217", features = ["derive"] }
tempfile = "3.15.0"

Expand Down
46 changes: 44 additions & 2 deletions heed/src/envs/encrypted_env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,50 @@ impl<T> EncryptedEnv<T> {
///
/// This function may be used to make a backup of an existing environment.
/// No lockfile is created, since it gets recreated at need.
pub fn copy_to_file<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
self.inner.copy_to_file(path, option)
///
/// Note that the file must be seek to the beginning after the copy is complete.
///
/// ```
/// use std::fs;
/// use std::io::{Read, Seek, SeekFrom};
/// use std::path::Path;
/// use heed3::{EnvOpenOptions, Database, EnvFlags, FlagSetMode, CompactionOption};
/// use heed3::types::*;
/// use memchr::memmem::find_iter;
///
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// # let dir = tempfile::tempdir()?;
/// # let env = unsafe { EnvOpenOptions::new()
/// # .map_size(10 * 1024 * 1024) // 10MB
/// # .max_dbs(3000)
/// # .open(dir.path())?
/// # };
///
/// let mut wtxn = env.write_txn()?;
/// let db: Database<Str, Str> = env.create_database(&mut wtxn, None)?;
///
/// db.put(&mut wtxn, &"hello0", &"world0")?;
/// db.put(&mut wtxn, &"hello1", &"world1")?;
/// db.put(&mut wtxn, &"hello2", &"world2")?;
/// db.put(&mut wtxn, &"hello3", &"world3")?;
///
/// wtxn.commit()?;
///
/// let mut tmp_file = tempfile::tempfile()?;
/// env.copy_to_file(&mut tmp_file, CompactionOption::Enabled)?;
/// let offset = tmp_file.seek(SeekFrom::Current(0))?;
/// assert_ne!(offset, 0);
///
/// let offset = tmp_file.seek(SeekFrom::Start(0))?;
/// assert_eq!(offset, 0);
///
/// let mut content = Vec::new();
/// tmp_file.read_to_end(&mut content)?;
/// assert!(content.len() > 8 * 6); // more than 8 times hellox + worldx
/// # Ok(()) }
/// ```
pub fn copy_to_file(&self, file: &mut File, option: CompactionOption) -> Result<()> {
self.inner.copy_to_file(file, option)
}

/// Copy an LMDB environment to the specified file descriptor, with compaction option.
Expand Down
57 changes: 46 additions & 11 deletions heed/src/envs/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,17 +403,52 @@ impl<T> Env<T> {
///
/// This function may be used to make a backup of an existing environment.
/// No lockfile is created, since it gets recreated at need.
pub fn copy_to_file<P: AsRef<Path>>(&self, path: P, option: CompactionOption) -> Result<File> {
let file = File::options().create_new(true).write(true).open(&path)?;
let fd = get_file_fd(&file);

unsafe { self.copy_to_fd(fd, option)? };

// We reopen the file to make sure the cursor is at the start,
// even a seek to start doesn't work properly.
let file = File::open(path)?;

Ok(file)
///
/// Note that the file must be seek to the beginning after the copy is complete.
///
/// ```
/// use std::fs;
/// use std::io::{Read, Seek, SeekFrom};
/// use std::path::Path;
/// use heed::{EnvOpenOptions, Database, EnvFlags, FlagSetMode, CompactionOption};
/// use heed::types::*;
/// use memchr::memmem::find_iter;
///
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// # let dir = tempfile::tempdir()?;
/// # let env = unsafe { EnvOpenOptions::new()
/// # .map_size(10 * 1024 * 1024) // 10MB
/// # .max_dbs(3000)
/// # .open(dir.path())?
/// # };
///
/// let mut wtxn = env.write_txn()?;
/// let db: Database<Str, Str> = env.create_database(&mut wtxn, None)?;
///
/// db.put(&mut wtxn, &"hello0", &"world0")?;
/// db.put(&mut wtxn, &"hello1", &"world1")?;
/// db.put(&mut wtxn, &"hello2", &"world2")?;
/// db.put(&mut wtxn, &"hello3", &"world3")?;
///
/// wtxn.commit()?;
///
/// let mut tmp_file = tempfile::tempfile()?;
/// env.copy_to_file(&mut tmp_file, CompactionOption::Enabled)?;
/// let offset = tmp_file.seek(SeekFrom::Current(0))?;
/// assert_ne!(offset, 0);
///
/// let offset = tmp_file.seek(SeekFrom::Start(0))?;
/// assert_eq!(offset, 0);
///
/// let mut content = Vec::new();
/// tmp_file.read_to_end(&mut content)?;
/// assert_eq!(find_iter(&content, b"hello").count(), 4);
/// assert_eq!(find_iter(&content, b"world").count(), 4);
/// # Ok(()) }
/// ```
pub fn copy_to_file(&self, file: &mut File, option: CompactionOption) -> Result<()> {
let fd = get_file_fd(file);
unsafe { self.copy_to_fd(fd, option) }
}

/// Copy an LMDB environment to the specified file descriptor, with compaction option.
Expand Down
1 change: 1 addition & 0 deletions heed3/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ synchronoise = "1.0.1"

[dev-dependencies]
argon2 = { version = "0.5.3", features = ["std"] }
memchr = "2.7.4"
serde = { version = "1.0.217", features = ["derive"] }
chacha20poly1305 = "0.10.1"
tempfile = "3.15.0"
Expand Down
Loading