Skip to content

Commit 43ad71c

Browse files
committed
update: streaming + performance uplift
1 parent 4d708e2 commit 43ad71c

File tree

8 files changed

+999
-393
lines changed

8 files changed

+999
-393
lines changed

Cargo.lock

Lines changed: 413 additions & 25 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
[package]
22
edition="2021"
33
name ="json-gen-actix"
4-
version="0.1.0"
4+
version="0.1.1"
55

66
[dependencies]
7-
actix-web ={ version="4.9.0", default-features=false, features=["macros"] }
8-
fake ={ version="3.1.0", default-features=false, features=["derive"] }
9-
num_cpus ="1.16.0"
10-
parking_lot="0.12.3"
11-
rand ="0.8.5"
12-
rand_chacha="0.3.1"
13-
rayon ={ version="1.10.0", default-features=false }
14-
serde ={ version="1.0.217", features=["derive"] }
7+
actix-web ={ version="4.9.0", default-features=false, features=["macros"] }
8+
dtoa ="1.0.9"
9+
fake ={ version="3.1.0", default-features=false, features=["derive"] }
10+
itoa ="1.0.14"
11+
num_cpus ="1.16.0"
12+
parking_lot ="0.12.3"
13+
rand ="0.8.5"
14+
rand_chacha ="0.3.1"
15+
rayon ={ version="1.10.0", default-features=false }
16+
serde ={ version="1.0.217", features=["derive"] }
17+
tokio ="1.43.0"
18+
tokio-stream="0.1.17"
1519

1620
[profile.release]
1721
codegen-units=1
@@ -20,3 +24,10 @@ lto ="thin"
2024
opt-level ="z"
2125
panic ="abort"
2226
strip =true
27+
28+
[dev-dependencies]
29+
criterion="0.5"
30+
31+
[[bench]]
32+
harness=false
33+
name ="data_generation"

benches/data_generation.rs

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
use actix_web::web;
2+
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
3+
use rand::{Rng, SeedableRng};
4+
use rand_chacha::ChaCha8Rng;
5+
use std::sync::Arc;
6+
7+
use json_gen_actix::{
8+
processing::{
9+
write_location_csv_simd, write_location_json_simd, BusinessLocation, DataPools,
10+
JsonPatterns, OutputFormat, StreamGenerator,
11+
},
12+
util::ProgressInfo,
13+
};
14+
15+
fn bench_formats(c: &mut Criterion) {
16+
let mut group = c.benchmark_group("data_generation");
17+
let data_pools = web::Data::new(Arc::new(DataPools::new()));
18+
let rng = ChaCha8Rng::seed_from_u64(42);
19+
20+
for size in [64, 256, 1024].iter() {
21+
group.bench_with_input(BenchmarkId::new("json_pretty", size), size, |b, &size| {
22+
b.iter(|| {
23+
let mut generator = StreamGenerator::new(
24+
0,
25+
rng.clone(),
26+
data_pools.clone(),
27+
true,
28+
OutputFormat::JSON,
29+
true,
30+
Arc::new(ProgressInfo::new(1.0)),
31+
size * 1024,
32+
);
33+
generator.generate_chunk()
34+
})
35+
});
36+
37+
group.bench_with_input(BenchmarkId::new("json_compact", size), size, |b, &size| {
38+
b.iter(|| {
39+
let mut generator = StreamGenerator::new(
40+
0,
41+
rng.clone(),
42+
data_pools.clone(),
43+
false,
44+
OutputFormat::JSON,
45+
true,
46+
Arc::new(ProgressInfo::new(1.0)),
47+
size * 1024,
48+
);
49+
generator.generate_chunk()
50+
})
51+
});
52+
53+
group.bench_with_input(BenchmarkId::new("csv", size), size, |b, &size| {
54+
b.iter(|| {
55+
let mut generator = StreamGenerator::new(
56+
0,
57+
rng.clone(),
58+
data_pools.clone(),
59+
false,
60+
OutputFormat::CSV,
61+
true,
62+
Arc::new(ProgressInfo::new(1.0)),
63+
size * 1024,
64+
);
65+
generator.generate_chunk()
66+
})
67+
});
68+
}
69+
group.finish();
70+
}
71+
72+
fn bench_simd_operations(c: &mut Criterion) {
73+
let mut group = c.benchmark_group("simd_ops");
74+
let location = BusinessLocation {
75+
id: 1,
76+
name: "Test Company".into(),
77+
industry: "Technology".into(),
78+
revenue: 1000000.0,
79+
employees: 100,
80+
city: "Test City".into(),
81+
state: "Test State".into(),
82+
country: "Test Country".into(),
83+
};
84+
85+
group.bench_function("json_simd_write", |b| {
86+
b.iter(|| {
87+
let mut output = Vec::with_capacity(1024);
88+
write_location_json_simd(&location, &mut output, true, &JsonPatterns::new(), true);
89+
})
90+
});
91+
92+
group.bench_function("csv_simd_write", |b| {
93+
b.iter(|| {
94+
let mut output = Vec::with_capacity(1024);
95+
write_location_csv_simd(&location, &mut output);
96+
})
97+
});
98+
group.finish();
99+
}
100+
101+
fn bench_data_generation(c: &mut Criterion) {
102+
let mut group = c.benchmark_group("data_gen");
103+
let data_pools = web::Data::new(Arc::new(DataPools::new()));
104+
let rng = ChaCha8Rng::seed_from_u64(42);
105+
106+
group.bench_function("location_gen", |b| {
107+
b.iter(|| BusinessLocation {
108+
id: 1,
109+
name: data_pools.names[0].clone(),
110+
industry: data_pools.industries[0].clone(),
111+
revenue: rng.clone().gen_range(100000.0..100000000.0),
112+
employees: rng.clone().gen_range(10..10000),
113+
city: data_pools.cities[0].clone(),
114+
state: data_pools.states[0].clone(),
115+
country: data_pools.countries[0].clone(),
116+
})
117+
});
118+
group.finish();
119+
}
120+
121+
criterion_group!(
122+
benches,
123+
bench_formats,
124+
bench_simd_operations,
125+
bench_data_generation
126+
);
127+
criterion_main!(benches);

readme.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,7 @@ curl "http://127.0.0.1:8080/generate?size=100mb&format=csv"
1818
- The `size` parameter specifies the target size of the generated content.
1919
- The `format` parameter supports either JSON or CSV.
2020
- default: `json`
21+
22+
### Bugs
23+
24+
- Progress indicator may end up falling short.

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#![feature(portable_simd)]
2+
pub mod processing;
3+
pub mod util;

0 commit comments

Comments
 (0)