Skip to content

Commit 8572903

Browse files
authored
extend json parse bench (#5516)
* extend json parse bench * remove Some(())
1 parent 02a5b6a commit 8572903

File tree

5 files changed

+219
-71
lines changed

5 files changed

+219
-71
lines changed

quickwit/Cargo.lock

Lines changed: 99 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

quickwit/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ async-compression = { version = "0.4", features = ["tokio", "gzip"] }
8686
async-speed-limit = "0.4"
8787
async-trait = "0.1"
8888
base64 = "0.22"
89+
binggan = { version = "0.14" }
8990
bytes = { version = "1", features = ["serde"] }
9091
bytesize = { version = "1.3.0", features = ["serde"] }
9192
bytestring = "1.3.0"

quickwit/quickwit-doc-mapper/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ quickwit-proto = { workspace = true }
3636
quickwit-query = { workspace = true }
3737

3838
[dev-dependencies]
39-
criterion = { workspace = true }
39+
binggan = { workspace = true }
4040
matches = { workspace = true }
4141
serde_yaml = { workspace = true }
4242
time = { workspace = true }

quickwit/quickwit-doc-mapper/benches/doc_to_json_bench.rs

Lines changed: 88 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,15 @@
1717
// You should have received a copy of the GNU Affero General Public License
1818
// along with this program. If not, see <http://www.gnu.org/licenses/>.
1919

20-
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
20+
use binggan::plugins::*;
21+
use binggan::{black_box, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM};
2122
use quickwit_doc_mapper::DocMapper;
2223
use tantivy::TantivyDocument;
2324

24-
const JSON_TEST_DATA: &str = include_str!("data/simple-parse-bench.json");
25+
const SIMPLE_JSON_TEST_DATA: &str = include_str!("data/simple-parse-bench.json");
26+
const ROUTING_TEST_DATA: &str = include_str!("data/simple-routing-expression-bench.json");
2527

26-
const DOC_MAPPER_CONF: &str = r#"{
28+
const DOC_MAPPER_CONF_SIMPLE_JSON: &str = r#"{
2729
"type": "default",
2830
"default_search_fields": [],
2931
"tag_fields": [],
@@ -35,28 +37,92 @@ const DOC_MAPPER_CONF: &str = r#"{
3537
]
3638
}"#;
3739

38-
pub fn simple_json_to_doc_benchmark(c: &mut Criterion) {
39-
let doc_mapper: Box<DocMapper> = serde_json::from_str(DOC_MAPPER_CONF).unwrap();
40-
let lines: Vec<&str> = JSON_TEST_DATA.lines().map(|line| line.trim()).collect();
40+
/// Note that {"name": "date", "type": "datetime", "input_formats": ["%Y-%m-%d"], "output_format":
41+
/// "%Y-%m-%d"}, is removed since tantivy parsing only supports RFC3339
42+
const ROUTING_DOC_MAPPER_CONF: &str = r#"{
43+
"type": "default",
44+
"default_search_fields": [],
45+
"tag_fields": [],
46+
"field_mappings": [
47+
{"name": "timestamp", "type": "datetime", "input_formats": ["unix_timestamp"], "output_format": "%Y-%m-%d %H:%M:%S", "output_format": "%Y-%m-%d %H:%M:%S", "fast": true },
48+
{"name": "source", "type": "text" },
49+
{"name": "vin", "type": "text" },
50+
{"name": "vid", "type": "text" },
51+
{"name": "domain", "type": "text" },
52+
{"name": "seller", "type": "object", "field_mappings": [
53+
{"name": "id", "type": "text" },
54+
{"name": "name", "type": "text" },
55+
{"name": "address", "type": "text" },
56+
{"name": "zip", "type": "text" }
57+
]}
58+
],
59+
"partition_key": "seller.id"
60+
}"#;
61+
62+
#[global_allocator]
63+
pub static GLOBAL: &PeakMemAlloc<std::alloc::System> = &INSTRUMENTED_SYSTEM;
64+
65+
fn get_test_data(
66+
name: &'static str,
67+
raw: &'static str,
68+
doc_mapper: &'static str,
69+
) -> (&'static str, usize, Vec<&'static str>, Box<DocMapper>) {
70+
let lines: Vec<&str> = raw.lines().map(|line| line.trim()).collect();
71+
(
72+
name,
73+
raw.len(),
74+
lines,
75+
serde_json::from_str(doc_mapper).unwrap(),
76+
)
77+
}
4178

42-
let mut group = c.benchmark_group("simple-json-to-doc");
43-
group.throughput(Throughput::Bytes(JSON_TEST_DATA.len() as u64));
44-
group.bench_function("simple-json-to-doc", |b| {
45-
b.iter(|| {
46-
for line in &lines {
47-
doc_mapper.doc_from_json_str(line).unwrap();
79+
fn run_bench() {
80+
let inputs: Vec<(&str, usize, Vec<&str>, Box<DocMapper>)> = vec![
81+
(get_test_data(
82+
"flat_json",
83+
SIMPLE_JSON_TEST_DATA,
84+
DOC_MAPPER_CONF_SIMPLE_JSON,
85+
)),
86+
(get_test_data("routing_json", ROUTING_TEST_DATA, ROUTING_DOC_MAPPER_CONF)),
87+
];
88+
89+
let mut runner: BenchRunner = BenchRunner::new();
90+
91+
runner.config().set_num_iter_for_bench(1);
92+
runner.config().set_num_iter_for_group(100);
93+
runner
94+
.add_plugin(CacheTrasher::default())
95+
.add_plugin(BPUTrasher::default())
96+
.add_plugin(PeakMemAllocPlugin::new(GLOBAL));
97+
98+
for (input_name, size, data, doc_mapper) in inputs.iter() {
99+
let dynamic_doc_mapper: DocMapper =
100+
serde_json::from_str(r#"{ "mode": "dynamic" }"#).unwrap();
101+
let mut group = runner.new_group();
102+
group.set_name(input_name);
103+
group.set_input_size(*size);
104+
group.register_with_input("doc_mapper", data, |lines| {
105+
for line in lines {
106+
black_box(doc_mapper.doc_from_json_str(line).unwrap());
48107
}
49-
})
50-
});
51-
group.bench_function("simple-json-to-doc-tantivy", |b| {
52-
b.iter(|| {
108+
});
109+
110+
group.register_with_input("doc_mapper_dynamic", data, |lines| {
111+
for line in lines {
112+
black_box(dynamic_doc_mapper.doc_from_json_str(line).unwrap());
113+
}
114+
});
115+
116+
group.register_with_input("tantivy parse json", data, |lines| {
53117
let schema = doc_mapper.schema();
54-
for line in &lines {
55-
let _doc = TantivyDocument::parse_json(&schema, line).unwrap();
118+
for line in lines {
119+
let _doc = black_box(TantivyDocument::parse_json(&schema, line).unwrap());
56120
}
57-
})
58-
});
121+
});
122+
group.run();
123+
}
59124
}
60125

61-
criterion_group!(benches, simple_json_to_doc_benchmark);
62-
criterion_main!(benches);
126+
fn main() {
127+
run_bench();
128+
}

0 commit comments

Comments
 (0)