Skip to content

Commit 05da0fe

Browse files
committed
Sped up importing by TONS. Also made it use zstd instead of bzip2 for chunk compression.
1 parent da4d986 commit 05da0fe

File tree

12 files changed

+362
-59
lines changed

12 files changed

+362
-59
lines changed

Cargo.lock

+66
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ uuid = { version = "1.9.1", features = ["v4", "v3", "v5"] }
4747
include-flate = "0.3.0"
4848
flate2 = "1.0.31"
4949
bzip2 = "0.4.4"
50+
lz4_flex = "0.11.3"
51+
zstd = "0.13.2"
5052

5153
# OS
5254
which = "6.0.1"

src/database/chunks.rs

+78-17
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
use byteorder::LE;
2+
use zstd::bulk::compress as zstd_compress;
3+
use zstd::bulk::decompress as zstd_decompress;
24
use heed::types::{Bytes, U64};
35
use heed::Env;
46
use tokio::task::spawn_blocking;
@@ -7,27 +9,27 @@ use tracing::{trace, warn};
79
use crate::database::Database;
810
use crate::utils::error::Error;
911
use crate::utils::hash::hash;
10-
use crate::world::chunkformat::Chunk;
12+
use crate::world::chunk_format::Chunk;
1113

12-
use crate::utils::binary_utils::{bzip_compress, bzip_decompress};
14+
// use crate::utils::binary_utils::{bzip_compress, bzip_decompress};
1315
use bincode::config::standard;
1416
use bincode::{decode_from_slice, encode_to_vec};
1517

1618
impl Database {
1719
/// Fetch chunk from database
1820
fn get_chunk_from_database(db: &Env, key: &u64) -> Result<Option<Chunk>, Error> {
1921
// Initialize read transaction and open chunks table
20-
let ro_tx = db.read_txn().unwrap();
22+
let ro_tx = db.read_txn()?;
2123
let database = db
22-
.open_database::<U64<LE>, Bytes>(&ro_tx, Some("chunks"))
23-
.unwrap()
24+
.open_database::<U64<LE>, Bytes>(&ro_tx, Some("chunks"))?
2425
.expect("No table \"chunks\" found. The database should have been initialized");
2526

2627
// Attempt to fetch chunk from table
2728
if let Ok(data) = database.get(&ro_tx, key) {
2829
Ok(data.map(|encoded_chunk| {
29-
let decompressed =
30-
bzip_decompress(&encoded_chunk).expect("Failed to decompress chunk");
30+
// let decompressed =
31+
// bzip_decompress(&encoded_chunk).expect("Failed to decompress chunk");
32+
let decompressed = zstd_decompress(&encoded_chunk, 1024*1024*64).expect("Failed to decompress chunk");
3133
let chunk: (Chunk, usize) = decode_from_slice(&*decompressed, standard())
3234
.expect("Failed to decode chunk from database");
3335
chunk.0
@@ -40,15 +42,15 @@ impl Database {
4042
/// Insert a single chunk into database
4143
fn insert_chunk_into_database(db: &Env, chunk: &Chunk) -> Result<(), Error> {
4244
// Initialize write transaction and open chunks table
43-
let mut rw_tx = db.write_txn().unwrap();
45+
let mut rw_tx = db.write_txn()?;
4446
let database = db
45-
.open_database::<U64<LE>, Bytes>(&rw_tx, Some("chunks"))
46-
.unwrap()
47+
.open_database::<U64<LE>, Bytes>(&rw_tx, Some("chunks"))?
4748
.expect("No table \"chunks\" found. The database should have been initialized");
4849

4950
// Encode chunk
5051
let encoded_chunk = encode_to_vec(chunk, standard()).expect("Failed to encode chunk");
51-
let compressed = bzip_compress(&encoded_chunk).expect("Failed to compress chunk");
52+
// let compressed = bzip_compress(&encoded_chunk).expect("Failed to compress chunk");
53+
let compressed = zstd_compress(&encoded_chunk, 3).expect("Failed to compress chunk");
5254
let key = hash((chunk.dimension.as_ref().unwrap(), chunk.x_pos, chunk.z_pos));
5355

5456
// Insert chunk
@@ -70,18 +72,18 @@ impl Database {
7072
/// TODO: Find better name/disambiguation
7173
fn insert_chunks_into_database(db: &Env, chunks: &[Chunk]) -> Result<(), Error> {
7274
// Initialize write transaction and open chunks table
73-
let mut rw_tx = db.write_txn().unwrap();
75+
let mut rw_tx = db.write_txn()?;
7476
let database = db
75-
.open_database::<U64<LE>, Bytes>(&rw_tx, Some("chunks"))
76-
.unwrap()
77+
.open_database::<U64<LE>, Bytes>(&rw_tx, Some("chunks"))?
7778
.expect("No table \"chunks\" found. The database should have been initialized");
7879

7980
// Update page
8081
for chunk in chunks {
8182
// Encode chunk
8283
let encoded_chunk = encode_to_vec(chunk, standard()).expect("Failed to encode chunk");
8384

84-
let compressed = bzip_compress(&encoded_chunk).expect("Failed to compress chunk");
85+
// let compressed = bzip_compress(&encoded_chunk).expect("Failed to compress chunk");
86+
let compressed = zstd_compress(&encoded_chunk, 3).expect("Failed to compress chunk");
8587
let key = hash((chunk.dimension.as_ref().unwrap(), chunk.x_pos, chunk.z_pos));
8688

8789
// Insert chunk
@@ -126,7 +128,7 @@ impl Database {
126128
warn!("Error getting chunk: {:X}", key,);
127129
}
128130
})
129-
.await?;
131+
.await?;
130132
Ok(())
131133
}
132134

@@ -310,13 +312,72 @@ impl Database {
310312
///
311313
/// ```
312314
pub async fn batch_insert(&self, values: Vec<Chunk>) -> Result<(), Error> {
315+
/*
316+
317+
trace!("processing chunks (compressing and encoding)");
318+
// Process chunks in parallel
319+
let processed_chunks: Vec<(u64, Vec<u8>)> = values
320+
.par_iter()
321+
.map(|chunk| {
322+
let key = hash((
323+
chunk.dimension.as_ref().expect(&format!("Invalid chunk @ ({},{})", chunk.x_pos, chunk.z_pos)),
324+
chunk.x_pos,
325+
chunk.z_pos,
326+
));
327+
328+
let encoded_chunk = encode_to_vec(chunk, standard())
329+
.expect("Failed to encode chunk");
330+
let compressed = zstd_compress(&encoded_chunk, 3)
331+
.expect("Failed to compress chunk.")
332+
;
333+
334+
(key, compressed)
335+
})
336+
.collect();
337+
trace!("processed chunks");*/
338+
339+
// Insert into cache in parallel
340+
// TODO: re-enable this?
341+
/*values.par_iter().for_each(|chunk| {
342+
let key = hash((
343+
chunk.dimension.as_ref().expect(&format!("Invalid chunk @ ({},{})", chunk.x_pos, chunk.z_pos)),
344+
chunk.x_pos,
345+
chunk.z_pos,
346+
));
347+
348+
// tokio::spawn(self.load_into_cache(key));
349+
// if let Err(e) = self.cache.insert(key, chunk.clone()) {
350+
// warn!("Failed to insert chunk into cache: {:?}", e);
351+
// }
352+
});
353+
*/
354+
355+
/*trace!("Inserting chunks into database");
356+
// Perform batch insert into LMDB
357+
spawn_blocking(move || {
358+
let mut rw_tx = db.write_txn()?;
359+
let database = db
360+
.open_database::<U64<LE>, Bytes>(&rw_tx, Some("chunks"))?
361+
.expect("No table \"chunks\" found. The database should have been initialized");
362+
363+
for (key, compressed) in processed_chunks {
364+
database.put(&mut rw_tx, &key, &compressed)?;
365+
}
366+
367+
rw_tx.commit()?;
368+
Ok::<_, Error>(())
369+
})
370+
.await??;
371+
372+
Ok(())*/
373+
313374
// Clone database pointer
314375
let db = self.db.clone();
315376

316377
// Calculate all keys
317378
let keys = values
318379
.iter()
319-
.map(|v| hash((v.dimension.as_ref().unwrap(), v.x_pos, v.z_pos)))
380+
.map(|v| hash((v.dimension.as_ref().expect(format!("Invalid chunk @ ({},{})", v.x_pos, v.z_pos).as_str()), v.x_pos, v.z_pos)))
320381
.collect::<Vec<u64>>();
321382

322383
// WARNING: The previous logic was to first insert in database and then insert in cache using load_into_cache fn.

src/database/mod.rs

+3-4
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,10 @@ use tracing::{debug, info, trace};
1414
use crate::utils::config::get_global_config;
1515
use crate::utils::error::Error;
1616

17-
use crate::world::chunkformat::Chunk;
17+
use crate::world::chunk_format::Chunk;
1818
pub mod chunks;
1919

2020
// MDBX constants
21-
const LMDB_MAX_TABLE: u32 = 16;
2221
const LMDB_PAGE_SIZE: usize = 50 * 1024usize.pow(3); // 50GiB
2322

2423
/// Global database structure
@@ -61,7 +60,7 @@ pub async fn start_database() -> Result<Database, Error> {
6160
let world = get_global_config().world.clone();
6261
let world_path = root.join("data").join(world);
6362

64-
debug!("Opening database at {:?}", world_path);
63+
debug!("Opening database at {}", world_path.display());
6564

6665
if !fs::try_exists(&world_path).await? {
6766
fs::create_dir_all(&world_path).await?;
@@ -71,7 +70,7 @@ pub async fn start_database() -> Result<Database, Error> {
7170
let mut opts = EnvOpenOptions::new();
7271
opts.max_readers(num_cpus::get() as u32)
7372
.map_size(LMDB_PAGE_SIZE)
74-
.max_dbs(LMDB_MAX_TABLE);
73+
.max_dbs(num_cpus::get() as u32 *2);
7574

7675
// Open database (This operation is safe as we assume no other process touched the database)
7776
let lmdb = unsafe {

src/main.rs

+2
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ async fn start_server() -> Result<()> {
8787
let state = create_state(listener).await?;
8888

8989
if env::args().any(|arg| arg == "--import") {
90+
// world::importing::import_regions(state.clone()).await?;
91+
rayon::ThreadPoolBuilder::new().num_threads(num_cpus::get() * 2).build_global().expect("Failed to build rayon thread pool");
9092
world::importing::import_regions(state.clone()).await?;
9193
exit(0);
9294
}

src/net/packets/outgoing/chunk_and_light_data.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::state::GlobalState;
22
use crate::utils::encoding::bitset::BitSet;
33
use crate::utils::error::Error;
4-
use crate::world::chunkformat::{Heightmaps};
4+
use crate::world::chunk_format::{Heightmaps};
55
use crate::Result;
66
use ferrumc_codec::enc::NetEncode;
77
use ferrumc_codec::network_types::varint::VarInt;

src/utils/error.rs

+12-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use std::convert::Infallible;
22

3+
use crate::world::importing_v2::ImportingError;
34
use config::ConfigError;
45

56
#[derive(thiserror::Error, Debug)]
@@ -78,16 +79,23 @@ pub enum Error {
7879
BitOutputOverflow(usize, usize),
7980
#[error("Attempted to read more bits than are available: {0} attempted, {1} available")]
8081
BitReadOverflow(usize, usize),
81-
#[error("Attemped to read more bits than are available in the input type: {0} attempted, {1} available")]
82+
#[error("Attemped to read more bits than are available in the input type: {0} attempted, {1} available"
83+
)]
8284
BitReadOverflowInput(usize, usize),
83-
#[error("Attemped to write more bits than are available in the output type: {0} attempted, {1} available")]
85+
#[error("Attemped to write more bits than are available in the output type: {0} attempted, {1} available"
86+
)]
8487
BitWriteOverflow(usize, usize),
8588
#[error("Codec error")]
8689
CodecError(#[from] ferrumc_codec::error::CodecError),
8790
#[error("Conversion error")]
8891
ConversionError,
8992
#[error(transparent)]
9093
CompressionError(std::io::Error),
94+
95+
#[error(transparent)]
96+
ImportingError(#[from] ImportingError),
97+
#[error("Database error: {0}")]
98+
LmdbError(#[from] heed::Error),
9199
}
92100

93101
impl From<Infallible> for Error {
@@ -107,3 +115,5 @@ impl From<Error> for std::io::ErrorKind {
107115
std::io::ErrorKind::Other
108116
}
109117
}
118+
119+
File renamed without changes.

src/world/conversions.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::utils::error::Error;
2-
use crate::world::chunkformat::{BlockStates, Chunk, Palette, Section};
2+
use crate::world::chunk_format::{BlockStates, Chunk, Palette, Section};
33
use ferrumc_codec::enc::NetEncode;
44
use ferrumc_codec::network_types::varint::VarInt;
55
use hashbrown::HashMap;

0 commit comments

Comments
 (0)