17
17
// You should have received a copy of the GNU Affero General Public License
18
18
// along with this program. If not, see <http://www.gnu.org/licenses/>.
19
19
20
- use criterion:: { criterion_group, criterion_main, Criterion , Throughput } ;
20
+ use binggan:: plugins:: * ;
21
+ use binggan:: { black_box, BenchRunner , PeakMemAlloc , INSTRUMENTED_SYSTEM } ;
21
22
use quickwit_doc_mapper:: DocMapper ;
22
23
use tantivy:: TantivyDocument ;
23
24
24
- const JSON_TEST_DATA : & str = include_str ! ( "data/simple-parse-bench.json" ) ;
25
+ const SIMPLE_JSON_TEST_DATA : & str = include_str ! ( "data/simple-parse-bench.json" ) ;
26
+ const ROUTING_TEST_DATA : & str = include_str ! ( "data/simple-routing-expression-bench.json" ) ;
25
27
26
- const DOC_MAPPER_CONF : & str = r#"{
28
+ const DOC_MAPPER_CONF_SIMPLE_JSON : & str = r#"{
27
29
"type": "default",
28
30
"default_search_fields": [],
29
31
"tag_fields": [],
@@ -35,28 +37,92 @@ const DOC_MAPPER_CONF: &str = r#"{
35
37
]
36
38
}"# ;
37
39
38
- pub fn simple_json_to_doc_benchmark ( c : & mut Criterion ) {
39
- let doc_mapper: Box < DocMapper > = serde_json:: from_str ( DOC_MAPPER_CONF ) . unwrap ( ) ;
40
- let lines: Vec < & str > = JSON_TEST_DATA . lines ( ) . map ( |line| line. trim ( ) ) . collect ( ) ;
40
+ /// Note that {"name": "date", "type": "datetime", "input_formats": ["%Y-%m-%d"], "output_format":
41
+ /// "%Y-%m-%d"}, is removed since tantivy parsing only supports RFC3339
42
+ const ROUTING_DOC_MAPPER_CONF : & str = r#"{
43
+ "type": "default",
44
+ "default_search_fields": [],
45
+ "tag_fields": [],
46
+ "field_mappings": [
47
+ {"name": "timestamp", "type": "datetime", "input_formats": ["unix_timestamp"], "output_format": "%Y-%m-%d %H:%M:%S", "output_format": "%Y-%m-%d %H:%M:%S", "fast": true },
48
+ {"name": "source", "type": "text" },
49
+ {"name": "vin", "type": "text" },
50
+ {"name": "vid", "type": "text" },
51
+ {"name": "domain", "type": "text" },
52
+ {"name": "seller", "type": "object", "field_mappings": [
53
+ {"name": "id", "type": "text" },
54
+ {"name": "name", "type": "text" },
55
+ {"name": "address", "type": "text" },
56
+ {"name": "zip", "type": "text" }
57
+ ]}
58
+ ],
59
+ "partition_key": "seller.id"
60
+ }"# ;
61
+
62
+ #[ global_allocator]
63
+ pub static GLOBAL : & PeakMemAlloc < std:: alloc:: System > = & INSTRUMENTED_SYSTEM ;
64
+
65
+ fn get_test_data (
66
+ name : & ' static str ,
67
+ raw : & ' static str ,
68
+ doc_mapper : & ' static str ,
69
+ ) -> ( & ' static str , usize , Vec < & ' static str > , Box < DocMapper > ) {
70
+ let lines: Vec < & str > = raw. lines ( ) . map ( |line| line. trim ( ) ) . collect ( ) ;
71
+ (
72
+ name,
73
+ raw. len ( ) ,
74
+ lines,
75
+ serde_json:: from_str ( doc_mapper) . unwrap ( ) ,
76
+ )
77
+ }
41
78
42
- let mut group = c. benchmark_group ( "simple-json-to-doc" ) ;
43
- group. throughput ( Throughput :: Bytes ( JSON_TEST_DATA . len ( ) as u64 ) ) ;
44
- group. bench_function ( "simple-json-to-doc" , |b| {
45
- b. iter ( || {
46
- for line in & lines {
47
- doc_mapper. doc_from_json_str ( line) . unwrap ( ) ;
79
+ fn run_bench ( ) {
80
+ let inputs: Vec < ( & str , usize , Vec < & str > , Box < DocMapper > ) > = vec ! [
81
+ ( get_test_data(
82
+ "flat_json" ,
83
+ SIMPLE_JSON_TEST_DATA ,
84
+ DOC_MAPPER_CONF_SIMPLE_JSON ,
85
+ ) ) ,
86
+ ( get_test_data( "routing_json" , ROUTING_TEST_DATA , ROUTING_DOC_MAPPER_CONF ) ) ,
87
+ ] ;
88
+
89
+ let mut runner: BenchRunner = BenchRunner :: new ( ) ;
90
+
91
+ runner. config ( ) . set_num_iter_for_bench ( 1 ) ;
92
+ runner. config ( ) . set_num_iter_for_group ( 100 ) ;
93
+ runner
94
+ . add_plugin ( CacheTrasher :: default ( ) )
95
+ . add_plugin ( BPUTrasher :: default ( ) )
96
+ . add_plugin ( PeakMemAllocPlugin :: new ( GLOBAL ) ) ;
97
+
98
+ for ( input_name, size, data, doc_mapper) in inputs. iter ( ) {
99
+ let dynamic_doc_mapper: DocMapper =
100
+ serde_json:: from_str ( r#"{ "mode": "dynamic" }"# ) . unwrap ( ) ;
101
+ let mut group = runner. new_group ( ) ;
102
+ group. set_name ( input_name) ;
103
+ group. set_input_size ( * size) ;
104
+ group. register_with_input ( "doc_mapper" , data, |lines| {
105
+ for line in lines {
106
+ black_box ( doc_mapper. doc_from_json_str ( line) . unwrap ( ) ) ;
48
107
}
49
- } )
50
- } ) ;
51
- group. bench_function ( "simple-json-to-doc-tantivy" , |b| {
52
- b. iter ( || {
108
+ } ) ;
109
+
110
+ group. register_with_input ( "doc_mapper_dynamic" , data, |lines| {
111
+ for line in lines {
112
+ black_box ( dynamic_doc_mapper. doc_from_json_str ( line) . unwrap ( ) ) ;
113
+ }
114
+ } ) ;
115
+
116
+ group. register_with_input ( "tantivy parse json" , data, |lines| {
53
117
let schema = doc_mapper. schema ( ) ;
54
- for line in & lines {
55
- let _doc = TantivyDocument :: parse_json ( & schema, line) . unwrap ( ) ;
118
+ for line in lines {
119
+ let _doc = black_box ( TantivyDocument :: parse_json ( & schema, line) . unwrap ( ) ) ;
56
120
}
57
- } )
58
- } ) ;
121
+ } ) ;
122
+ group. run ( ) ;
123
+ }
59
124
}
60
125
61
- criterion_group ! ( benches, simple_json_to_doc_benchmark) ;
62
- criterion_main ! ( benches) ;
126
+ fn main ( ) {
127
+ run_bench ( ) ;
128
+ }
0 commit comments