Skip to content

Commit 4d708e2

Browse files
committed
clean up
1 parent 65e60cc commit 4d708e2

File tree

4 files changed

+110
-113
lines changed

4 files changed

+110
-113
lines changed

Cargo.lock

Lines changed: 10 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

readme.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,7 @@ curl "http://127.0.0.1:8080/generate?size=100mb&format=csv"
1414
```
1515

1616
- The `pretty` parameter enables pretty-printed output (optional).
17+
- default: `false`
1718
- The `size` parameter specifies the target size of the generated content.
1819
- The `format` parameter supports either JSON or CSV.
20+
- default: `json`

src/main.rs

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
#![feature(get_mut_unchecked, portable_simd)]
22
use actix_web::{web, App, Error, HttpResponse, HttpServer};
3-
43
use parking_lot::Mutex;
54
use processing::*;
65
use rand::{Rng, SeedableRng};
76
use rand_chacha::ChaCha8Rng;
87
use rayon::prelude::*;
9-
108
use std::collections::HashMap;
119
use std::io::{stdout, Write};
12-
1310
use std::sync::Arc;
1411
mod processing;
1512

@@ -35,34 +32,30 @@ fn parse_size(size_str: &str) -> Result<usize, String> {
3532
Ok(number * multiplier)
3633
}
3734

35+
const BYTE_SIZE: usize = 1024 * 1024;
36+
3837
async fn generate_data(
3938
web::Query(params): web::Query<HashMap<String, String>>,
4039
data_pools: web::Data<Arc<DataPools>>,
4140
) -> Result<HttpResponse, Error> {
42-
const BYTE_SIZE: usize = 1024 * 1024;
43-
let target_size = if let Some(size_str) = params.get("size") {
44-
parse_size(size_str).unwrap_or(BYTE_SIZE)
45-
} else {
46-
BYTE_SIZE
41+
let target_size = match params.get("size") {
42+
Some(size) => match parse_size(size) {
43+
Ok(size) => size,
44+
Err(_) => BYTE_SIZE,
45+
},
46+
None => BYTE_SIZE,
4747
};
4848
let pretty = params.get("pretty").map_or(false, |v| v == "true");
4949
let format = OutputFormat::from_str(params.get("format").map_or("json", |s| s));
5050
let seed: u64 = rand::thread_rng().gen();
5151
let num_threads = num_cpus::get();
5252
let chunk_size = target_size / num_threads;
53-
54-
// Calculate approximate records per chunk based on average record size
5553
let avg_record_size = if format == OutputFormat::JSON {
56-
if pretty {
57-
250
58-
} else {
59-
180
60-
} // Approximate sizes
54+
250
6155
} else {
62-
120 // CSV approximate record size
56+
120
6357
};
6458
let records_per_chunk = chunk_size / avg_record_size;
65-
6659
let progress = Arc::new(Mutex::new(ProgressInfo::new(
6760
(target_size / BYTE_SIZE) as f64,
6861
)));
@@ -79,6 +72,7 @@ async fn generate_data(
7972
"CSV"
8073
}
8174
);
75+
8276
if format == OutputFormat::JSON {
8377
println!(
8478
"Pretty print: {}",

src/processing.rs

Lines changed: 87 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,87 @@
1-
use fake::faker::address::en::*;
2-
use fake::faker::company::en::*;
3-
use fake::Fake;
1+
use crate::ProgressInfo;
2+
use fake::{
3+
faker::{address::en::*, company::en::*},
4+
Fake,
5+
};
46
use parking_lot::Mutex;
57
use rand::Rng;
68
use rand_chacha::ChaCha8Rng;
79
use serde::Serialize;
810
use std::{simd::u8x32, sync::Arc};
911

10-
use crate::ProgressInfo;
12+
const BYTE_COUNT: usize = 32;
13+
const REFRESH_COUNT: u32 = 1500;
14+
const POOL_SIZE: i32 = 1000;
15+
16+
// Pre-computed patterns for both pretty and compact modes
17+
struct JsonPatterns {
18+
separator_pretty: [u8; 32], // ",\n "
19+
separator_compact: [u8; 32], // ","
20+
ending_pretty: [u8; 32], // "\n }"
21+
ending_compact: [u8; 32], // "}"
22+
quoted_field_patterns: [QuotedFieldPattern; 5],
23+
unquoted_field_patterns: [UnquotedFieldPattern; 3],
24+
}
25+
26+
struct QuotedFieldPattern {
27+
prefix: [u8; 32], // "\"field\": \""
28+
suffix: [u8; 32], // "\""
29+
prefix_len: usize,
30+
suffix_len: usize,
31+
}
32+
33+
// Pre-computed pattern for each unquoted field
34+
struct UnquotedFieldPattern {
35+
prefix: [u8; 32],
36+
prefix_len: usize,
37+
}
38+
39+
impl JsonPatterns {
40+
fn new() -> Self {
41+
let mut field_start_pretty = [0u8; 32];
42+
field_start_pretty[..6].copy_from_slice(b"\n \"");
43+
44+
let mut field_start_compact = [0u8; 32];
45+
field_start_compact[0] = b'"';
46+
47+
let mut separator_pretty = [0u8; 32];
48+
separator_pretty[..6].copy_from_slice(b",\n ");
49+
50+
let mut separator_compact = [0u8; 32];
51+
separator_compact[..1].copy_from_slice(b",");
52+
53+
let mut ending_pretty = [0u8; 32];
54+
ending_pretty[..4].copy_from_slice(b"\n }");
55+
56+
let mut ending_compact = [0u8; 32];
57+
ending_compact[0] = b'}';
58+
59+
let quoted_fields = [
60+
("name", create_quoted_pattern(b"name")),
61+
("industry", create_quoted_pattern(b"industry")),
62+
("city", create_quoted_pattern(b"city")),
63+
("state", create_quoted_pattern(b"state")),
64+
("country", create_quoted_pattern(b"country")),
65+
]
66+
.map(|(_, pattern)| pattern);
67+
68+
let unquoted_fields = [
69+
("id", create_unquoted_pattern(b"id")),
70+
("revenue", create_unquoted_pattern(b"revenue")),
71+
("employees", create_unquoted_pattern(b"employees")),
72+
]
73+
.map(|(_, pattern)| pattern);
74+
75+
Self {
76+
separator_pretty,
77+
separator_compact,
78+
ending_pretty,
79+
ending_compact,
80+
quoted_field_patterns: quoted_fields,
81+
unquoted_field_patterns: unquoted_fields,
82+
}
83+
}
84+
}
1185

1286
#[derive(Serialize)]
1387
struct BusinessLocation {
@@ -21,7 +95,6 @@ struct BusinessLocation {
2195
country: String,
2296
}
2397

24-
// Add an enum to represent output formats
2598
#[derive(PartialEq)]
2699
pub enum OutputFormat {
27100
JSON,
@@ -58,12 +131,11 @@ pub struct ChunkResult {
58131

59132
impl DataPools {
60133
pub fn new() -> Self {
61-
let pool_size = 1000;
62134
DataPools {
63-
names: (0..pool_size).map(|_| CompanyName().fake()).collect(),
64-
industries: (0..pool_size).map(|_| Industry().fake()).collect(),
65-
cities: (0..pool_size).map(|_| CityName().fake()).collect(),
66-
states: (0..pool_size).map(|_| StateName().fake()).collect(),
135+
names: (0..POOL_SIZE).map(|_| CompanyName().fake()).collect(),
136+
industries: (0..POOL_SIZE).map(|_| Industry().fake()).collect(),
137+
cities: (0..POOL_SIZE).map(|_| CityName().fake()).collect(),
138+
states: (0..POOL_SIZE).map(|_| StateName().fake()).collect(),
67139
countries: (0..50).map(|_| CountryName().fake()).collect(),
68140
}
69141
}
@@ -122,7 +194,7 @@ pub fn generate_chunk(
122194
progress_locked.get_mut().update(output.len());
123195
}
124196

125-
if current_id % 1500 == 0 {
197+
if current_id % REFRESH_COUNT == 0 {
126198
progress.lock().print_progress();
127199
}
128200
}
@@ -174,92 +246,21 @@ fn write_location_csv_simd(location: &BusinessLocation, output: &mut Vec<u8>) {
174246
fn copy_str_simd(output: &mut Vec<u8>, s: &str) {
175247
let bytes = s.as_bytes();
176248
let len = bytes.len();
177-
let chunks = len / 32;
249+
let chunks = len / BYTE_COUNT;
178250

179251
// Process 32 bytes at a time using SIMD
180252
for chunk in 0..chunks {
181-
let offset = chunk * 32;
182-
let simd_chunk = u8x32::from_slice(&bytes[offset..offset + 32]);
253+
let offset = chunk * BYTE_COUNT;
254+
let simd_chunk = u8x32::from_slice(&bytes[offset..offset + BYTE_COUNT]);
183255
output.extend_from_slice(&simd_chunk.to_array());
184256
}
185257

186-
// Handle remaining bytes
187-
let remaining_start = chunks * 32;
258+
let remaining_start = chunks * BYTE_COUNT;
188259
if remaining_start < len {
189260
output.extend_from_slice(&bytes[remaining_start..]);
190261
}
191262
}
192263

193-
// Pre-computed patterns for both pretty and compact modes
194-
struct JsonPatterns {
195-
separator_pretty: [u8; 32], // ",\n "
196-
separator_compact: [u8; 32], // ","
197-
ending_pretty: [u8; 32], // "\n }"
198-
ending_compact: [u8; 32], // "}"
199-
quoted_field_patterns: [QuotedFieldPattern; 5],
200-
unquoted_field_patterns: [UnquotedFieldPattern; 3],
201-
}
202-
203-
struct QuotedFieldPattern {
204-
prefix: [u8; 32], // "\"field\": \""
205-
suffix: [u8; 32], // "\""
206-
prefix_len: usize,
207-
suffix_len: usize,
208-
}
209-
210-
// Pre-computed pattern for each unquoted field
211-
struct UnquotedFieldPattern {
212-
prefix: [u8; 32],
213-
prefix_len: usize,
214-
}
215-
216-
impl JsonPatterns {
217-
fn new() -> Self {
218-
let mut field_start_pretty = [0u8; 32];
219-
field_start_pretty[..6].copy_from_slice(b"\n \"");
220-
221-
let mut field_start_compact = [0u8; 32];
222-
field_start_compact[0] = b'"';
223-
224-
let mut separator_pretty = [0u8; 32];
225-
separator_pretty[..6].copy_from_slice(b",\n ");
226-
227-
let mut separator_compact = [0u8; 32];
228-
separator_compact[..1].copy_from_slice(b",");
229-
230-
let mut ending_pretty = [0u8; 32];
231-
ending_pretty[..4].copy_from_slice(b"\n }");
232-
233-
let mut ending_compact = [0u8; 32];
234-
ending_compact[0] = b'}';
235-
236-
let quoted_fields = [
237-
("name", create_quoted_pattern(b"name")),
238-
("industry", create_quoted_pattern(b"industry")),
239-
("city", create_quoted_pattern(b"city")),
240-
("state", create_quoted_pattern(b"state")),
241-
("country", create_quoted_pattern(b"country")),
242-
]
243-
.map(|(_, pattern)| pattern);
244-
245-
let unquoted_fields = [
246-
("id", create_unquoted_pattern(b"id")),
247-
("revenue", create_unquoted_pattern(b"revenue")),
248-
("employees", create_unquoted_pattern(b"employees")),
249-
]
250-
.map(|(_, pattern)| pattern);
251-
252-
Self {
253-
separator_pretty,
254-
separator_compact,
255-
ending_pretty,
256-
ending_compact,
257-
quoted_field_patterns: quoted_fields,
258-
unquoted_field_patterns: unquoted_fields,
259-
}
260-
}
261-
}
262-
263264
#[inline]
264265
fn create_quoted_pattern(field_name: &[u8]) -> QuotedFieldPattern {
265266
let mut prefix = [0u8; 32];

0 commit comments

Comments
 (0)