1
- use fake:: faker:: address:: en:: * ;
2
- use fake:: faker:: company:: en:: * ;
3
- use fake:: Fake ;
1
+ use crate :: ProgressInfo ;
2
+ use fake:: {
3
+ faker:: { address:: en:: * , company:: en:: * } ,
4
+ Fake ,
5
+ } ;
4
6
use parking_lot:: Mutex ;
5
7
use rand:: Rng ;
6
8
use rand_chacha:: ChaCha8Rng ;
7
9
use serde:: Serialize ;
8
10
use std:: { simd:: u8x32, sync:: Arc } ;
9
11
10
- use crate :: ProgressInfo ;
12
+ const BYTE_COUNT : usize = 32 ;
13
+ const REFRESH_COUNT : u32 = 1500 ;
14
+ const POOL_SIZE : i32 = 1000 ;
15
+
16
+ // Pre-computed patterns for both pretty and compact modes
17
+ struct JsonPatterns {
18
+ separator_pretty : [ u8 ; 32 ] , // ",\n "
19
+ separator_compact : [ u8 ; 32 ] , // ","
20
+ ending_pretty : [ u8 ; 32 ] , // "\n }"
21
+ ending_compact : [ u8 ; 32 ] , // "}"
22
+ quoted_field_patterns : [ QuotedFieldPattern ; 5 ] ,
23
+ unquoted_field_patterns : [ UnquotedFieldPattern ; 3 ] ,
24
+ }
25
+
26
+ struct QuotedFieldPattern {
27
+ prefix : [ u8 ; 32 ] , // "\"field\": \""
28
+ suffix : [ u8 ; 32 ] , // "\""
29
+ prefix_len : usize ,
30
+ suffix_len : usize ,
31
+ }
32
+
33
+ // Pre-computed pattern for each unquoted field
34
+ struct UnquotedFieldPattern {
35
+ prefix : [ u8 ; 32 ] ,
36
+ prefix_len : usize ,
37
+ }
38
+
39
+ impl JsonPatterns {
40
+ fn new ( ) -> Self {
41
+ let mut field_start_pretty = [ 0u8 ; 32 ] ;
42
+ field_start_pretty[ ..6 ] . copy_from_slice ( b"\n \" " ) ;
43
+
44
+ let mut field_start_compact = [ 0u8 ; 32 ] ;
45
+ field_start_compact[ 0 ] = b'"' ;
46
+
47
+ let mut separator_pretty = [ 0u8 ; 32 ] ;
48
+ separator_pretty[ ..6 ] . copy_from_slice ( b",\n " ) ;
49
+
50
+ let mut separator_compact = [ 0u8 ; 32 ] ;
51
+ separator_compact[ ..1 ] . copy_from_slice ( b"," ) ;
52
+
53
+ let mut ending_pretty = [ 0u8 ; 32 ] ;
54
+ ending_pretty[ ..4 ] . copy_from_slice ( b"\n }" ) ;
55
+
56
+ let mut ending_compact = [ 0u8 ; 32 ] ;
57
+ ending_compact[ 0 ] = b'}' ;
58
+
59
+ let quoted_fields = [
60
+ ( "name" , create_quoted_pattern ( b"name" ) ) ,
61
+ ( "industry" , create_quoted_pattern ( b"industry" ) ) ,
62
+ ( "city" , create_quoted_pattern ( b"city" ) ) ,
63
+ ( "state" , create_quoted_pattern ( b"state" ) ) ,
64
+ ( "country" , create_quoted_pattern ( b"country" ) ) ,
65
+ ]
66
+ . map ( |( _, pattern) | pattern) ;
67
+
68
+ let unquoted_fields = [
69
+ ( "id" , create_unquoted_pattern ( b"id" ) ) ,
70
+ ( "revenue" , create_unquoted_pattern ( b"revenue" ) ) ,
71
+ ( "employees" , create_unquoted_pattern ( b"employees" ) ) ,
72
+ ]
73
+ . map ( |( _, pattern) | pattern) ;
74
+
75
+ Self {
76
+ separator_pretty,
77
+ separator_compact,
78
+ ending_pretty,
79
+ ending_compact,
80
+ quoted_field_patterns : quoted_fields,
81
+ unquoted_field_patterns : unquoted_fields,
82
+ }
83
+ }
84
+ }
11
85
12
86
#[ derive( Serialize ) ]
13
87
struct BusinessLocation {
@@ -21,7 +95,6 @@ struct BusinessLocation {
21
95
country : String ,
22
96
}
23
97
24
- // Add an enum to represent output formats
25
98
#[ derive( PartialEq ) ]
26
99
pub enum OutputFormat {
27
100
JSON ,
@@ -58,12 +131,11 @@ pub struct ChunkResult {
58
131
59
132
impl DataPools {
60
133
pub fn new ( ) -> Self {
61
- let pool_size = 1000 ;
62
134
DataPools {
63
- names : ( 0 ..pool_size ) . map ( |_| CompanyName ( ) . fake ( ) ) . collect ( ) ,
64
- industries : ( 0 ..pool_size ) . map ( |_| Industry ( ) . fake ( ) ) . collect ( ) ,
65
- cities : ( 0 ..pool_size ) . map ( |_| CityName ( ) . fake ( ) ) . collect ( ) ,
66
- states : ( 0 ..pool_size ) . map ( |_| StateName ( ) . fake ( ) ) . collect ( ) ,
135
+ names : ( 0 ..POOL_SIZE ) . map ( |_| CompanyName ( ) . fake ( ) ) . collect ( ) ,
136
+ industries : ( 0 ..POOL_SIZE ) . map ( |_| Industry ( ) . fake ( ) ) . collect ( ) ,
137
+ cities : ( 0 ..POOL_SIZE ) . map ( |_| CityName ( ) . fake ( ) ) . collect ( ) ,
138
+ states : ( 0 ..POOL_SIZE ) . map ( |_| StateName ( ) . fake ( ) ) . collect ( ) ,
67
139
countries : ( 0 ..50 ) . map ( |_| CountryName ( ) . fake ( ) ) . collect ( ) ,
68
140
}
69
141
}
@@ -122,7 +194,7 @@ pub fn generate_chunk(
122
194
progress_locked. get_mut ( ) . update ( output. len ( ) ) ;
123
195
}
124
196
125
- if current_id % 1500 == 0 {
197
+ if current_id % REFRESH_COUNT == 0 {
126
198
progress. lock ( ) . print_progress ( ) ;
127
199
}
128
200
}
@@ -174,92 +246,21 @@ fn write_location_csv_simd(location: &BusinessLocation, output: &mut Vec<u8>) {
174
246
fn copy_str_simd ( output : & mut Vec < u8 > , s : & str ) {
175
247
let bytes = s. as_bytes ( ) ;
176
248
let len = bytes. len ( ) ;
177
- let chunks = len / 32 ;
249
+ let chunks = len / BYTE_COUNT ;
178
250
179
251
// Process 32 bytes at a time using SIMD
180
252
for chunk in 0 ..chunks {
181
- let offset = chunk * 32 ;
182
- let simd_chunk = u8x32:: from_slice ( & bytes[ offset..offset + 32 ] ) ;
253
+ let offset = chunk * BYTE_COUNT ;
254
+ let simd_chunk = u8x32:: from_slice ( & bytes[ offset..offset + BYTE_COUNT ] ) ;
183
255
output. extend_from_slice ( & simd_chunk. to_array ( ) ) ;
184
256
}
185
257
186
- // Handle remaining bytes
187
- let remaining_start = chunks * 32 ;
258
+ let remaining_start = chunks * BYTE_COUNT ;
188
259
if remaining_start < len {
189
260
output. extend_from_slice ( & bytes[ remaining_start..] ) ;
190
261
}
191
262
}
192
263
193
- // Pre-computed patterns for both pretty and compact modes
194
- struct JsonPatterns {
195
- separator_pretty : [ u8 ; 32 ] , // ",\n "
196
- separator_compact : [ u8 ; 32 ] , // ","
197
- ending_pretty : [ u8 ; 32 ] , // "\n }"
198
- ending_compact : [ u8 ; 32 ] , // "}"
199
- quoted_field_patterns : [ QuotedFieldPattern ; 5 ] ,
200
- unquoted_field_patterns : [ UnquotedFieldPattern ; 3 ] ,
201
- }
202
-
203
- struct QuotedFieldPattern {
204
- prefix : [ u8 ; 32 ] , // "\"field\": \""
205
- suffix : [ u8 ; 32 ] , // "\""
206
- prefix_len : usize ,
207
- suffix_len : usize ,
208
- }
209
-
210
- // Pre-computed pattern for each unquoted field
211
- struct UnquotedFieldPattern {
212
- prefix : [ u8 ; 32 ] ,
213
- prefix_len : usize ,
214
- }
215
-
216
- impl JsonPatterns {
217
- fn new ( ) -> Self {
218
- let mut field_start_pretty = [ 0u8 ; 32 ] ;
219
- field_start_pretty[ ..6 ] . copy_from_slice ( b"\n \" " ) ;
220
-
221
- let mut field_start_compact = [ 0u8 ; 32 ] ;
222
- field_start_compact[ 0 ] = b'"' ;
223
-
224
- let mut separator_pretty = [ 0u8 ; 32 ] ;
225
- separator_pretty[ ..6 ] . copy_from_slice ( b",\n " ) ;
226
-
227
- let mut separator_compact = [ 0u8 ; 32 ] ;
228
- separator_compact[ ..1 ] . copy_from_slice ( b"," ) ;
229
-
230
- let mut ending_pretty = [ 0u8 ; 32 ] ;
231
- ending_pretty[ ..4 ] . copy_from_slice ( b"\n }" ) ;
232
-
233
- let mut ending_compact = [ 0u8 ; 32 ] ;
234
- ending_compact[ 0 ] = b'}' ;
235
-
236
- let quoted_fields = [
237
- ( "name" , create_quoted_pattern ( b"name" ) ) ,
238
- ( "industry" , create_quoted_pattern ( b"industry" ) ) ,
239
- ( "city" , create_quoted_pattern ( b"city" ) ) ,
240
- ( "state" , create_quoted_pattern ( b"state" ) ) ,
241
- ( "country" , create_quoted_pattern ( b"country" ) ) ,
242
- ]
243
- . map ( |( _, pattern) | pattern) ;
244
-
245
- let unquoted_fields = [
246
- ( "id" , create_unquoted_pattern ( b"id" ) ) ,
247
- ( "revenue" , create_unquoted_pattern ( b"revenue" ) ) ,
248
- ( "employees" , create_unquoted_pattern ( b"employees" ) ) ,
249
- ]
250
- . map ( |( _, pattern) | pattern) ;
251
-
252
- Self {
253
- separator_pretty,
254
- separator_compact,
255
- ending_pretty,
256
- ending_compact,
257
- quoted_field_patterns : quoted_fields,
258
- unquoted_field_patterns : unquoted_fields,
259
- }
260
- }
261
- }
262
-
263
264
#[ inline]
264
265
fn create_quoted_pattern ( field_name : & [ u8 ] ) -> QuotedFieldPattern {
265
266
let mut prefix = [ 0u8 ; 32 ] ;
0 commit comments