@@ -31,8 +31,8 @@ use prost::Message;
31
31
use prost_types:: { Duration as WellKnownDuration , Timestamp as WellKnownTimestamp } ;
32
32
use quickwit_config:: JaegerConfig ;
33
33
use quickwit_opentelemetry:: otlp:: {
34
- Event as QwEvent , Link as QwLink , Span as QwSpan , SpanFingerprint , SpanKind as QwSpanKind ,
35
- SpanStatus as QwSpanStatus , OTEL_TRACE_INDEX_ID ,
34
+ B64TraceId , Event as QwEvent , Link as QwLink , Span as QwSpan , SpanFingerprint ,
35
+ SpanKind as QwSpanKind , SpanStatus as QwSpanStatus , TraceId , OTEL_TRACE_INDEX_ID ,
36
36
} ;
37
37
use quickwit_proto:: jaeger:: api_v2:: {
38
38
KeyValue as JaegerKeyValue , Log as JaegerLog , Process as JaegerProcess , Span as JaegerSpan ,
@@ -45,9 +45,10 @@ use quickwit_proto::jaeger::storage::v1::{
45
45
SpansResponseChunk , TraceQueryParameters ,
46
46
} ;
47
47
use quickwit_proto:: { ListTermsRequest , SearchRequest } ;
48
- use quickwit_search:: SearchService ;
48
+ use quickwit_search:: { FindTraceIdsCollector , SearchService } ;
49
49
use serde:: Deserialize ;
50
50
use serde_json:: Value as JsonValue ;
51
+ use tantivy:: collector:: Collector ;
51
52
use time:: format_description:: well_known:: Rfc3339 ;
52
53
use time:: OffsetDateTime ;
53
54
use tokio:: sync:: mpsc;
@@ -63,9 +64,6 @@ mod metrics;
63
64
// OpenTelemetry to Jaeger Transformation
64
65
// <https://opentelemetry.io/docs/reference/specification/trace/sdk_exporters/jaeger/>
65
66
66
- /// A base64-encoded 16-byte array.
67
- type TraceId = String ;
68
-
69
67
type TimeIntervalSecs = RangeInclusive < i64 > ;
70
68
71
69
type JaegerResult < T > = Result < T , Status > ;
@@ -171,13 +169,12 @@ impl JaegerService {
171
169
. query
172
170
. ok_or_else ( || Status :: invalid_argument ( "Query is empty." ) ) ?;
173
171
174
- let ( trace_ids_b64, _) = self . find_trace_ids ( trace_query) . await ?;
175
- debug ! ( trace_ids=?trace_ids_b64, "`find_trace_ids` response" ) ;
176
-
177
- let trace_ids = trace_ids_b64
172
+ let ( trace_ids, _) = self . find_trace_ids ( trace_query) . await ?;
173
+ let trace_ids = trace_ids
178
174
. into_iter ( )
179
- . map ( |trace_id_b64| base64_decode ( trace_id_b64. as_bytes ( ) , "trace ID" ) )
180
- . collect :: < Result < _ , _ > > ( ) ?;
175
+ . map ( |trace_id| trace_id. b64_decode ( ) . to_vec ( ) )
176
+ . collect ( ) ;
177
+ debug ! ( trace_ids=?trace_ids, "`find_trace_ids` response" ) ;
181
178
let response = FindTraceIDsResponse { trace_ids } ;
182
179
Ok ( response)
183
180
}
@@ -212,7 +209,10 @@ impl JaegerService {
212
209
request_start : Instant ,
213
210
) -> JaegerResult < SpanStream > {
214
211
debug ! ( request=?request, "`get_trace` request" ) ;
215
- let trace_id = BASE64_STANDARD . encode ( request. trace_id ) ;
212
+ debug_assert_eq ! ( request. trace_id. len( ) , 16 ) ;
213
+ let trace_id = TraceId :: try_from ( request. trace_id )
214
+ . map_err ( |error| Status :: invalid_argument ( error. to_string ( ) ) ) ?
215
+ . b64_encode ( ) ;
216
216
let end = OffsetDateTime :: now_utc ( ) . unix_timestamp ( ) ;
217
217
let start = end - self . lookback_period_secs ;
218
218
let search_window = start..=end;
@@ -226,7 +226,7 @@ impl JaegerService {
226
226
async fn find_trace_ids (
227
227
& self ,
228
228
trace_query : TraceQueryParameters ,
229
- ) -> Result < ( Vec < TraceId > , TimeIntervalSecs ) , Status > {
229
+ ) -> Result < ( Vec < B64TraceId > , TimeIntervalSecs ) , Status > {
230
230
let index_id = OTEL_TRACE_INDEX_ID . to_string ( ) ;
231
231
let span_kind_opt = None ;
232
232
let min_span_start_timestamp_secs_opt = trace_query. start_time_min . map ( |ts| ts. seconds ) ;
@@ -269,15 +269,14 @@ impl JaegerService {
269
269
return Ok ( ( Vec :: new ( ) , 0 ..=0 ) ) ;
270
270
} ;
271
271
let trace_ids = collect_trace_ids ( & agg_result_json) ?;
272
- debug ! ( trace_ids=?trace_ids. 0 , "The query matched {} traces." , trace_ids. 0 . len( ) ) ;
273
-
272
+ debug ! ( "The query matched {} traces." , trace_ids. 0 . len( ) ) ;
274
273
Ok ( trace_ids)
275
274
}
276
275
277
276
#[ instrument( "stream_spans" , skip_all, fields( num_traces=%trace_ids. len( ) , num_spans=Empty , num_bytes=Empty ) ) ]
278
277
async fn stream_spans (
279
278
& self ,
280
- trace_ids : & [ TraceId ] ,
279
+ trace_ids : & [ B64TraceId ] ,
281
280
search_window : TimeIntervalSecs ,
282
281
operation_name : & ' static str ,
283
282
request_start : Instant ,
@@ -294,8 +293,10 @@ impl JaegerService {
294
293
query. push_str ( " OR " ) ;
295
294
}
296
295
query. push_str ( "trace_id:" ) ;
297
- query. push_str ( trace_id) ;
296
+ query. push_str ( trace_id. as_str ( ) )
298
297
}
298
+ debug ! ( query=%query, "Fetch spans query" ) ;
299
+
299
300
let search_request = SearchRequest {
300
301
index_id : OTEL_TRACE_INDEX_ID . to_string ( ) ,
301
302
query,
@@ -650,28 +651,12 @@ fn build_search_query(
650
651
}
651
652
652
653
fn build_aggregations_query ( num_traces : usize ) -> String {
653
- // DANGER: The fast field is truncated to seconds but the aggregation returns timestamps in
654
- // microseconds by appending a bunch of zeros.
655
- let query = format ! (
656
- r#"{{
657
- "trace_ids": {{
658
- "terms": {{
659
- "field": "trace_id",
660
- "size": {num_traces},
661
- "order": {{
662
- "max_span_start_timestamp_micros": "desc"
663
- }}
664
- }},
665
- "aggs": {{
666
- "max_span_start_timestamp_micros": {{
667
- "max": {{
668
- "field": "span_start_timestamp_secs"
669
- }}
670
- }}
671
- }}
672
- }}
673
- }}"# ,
674
- ) ;
654
+ let query = serde_json:: to_string ( & FindTraceIdsCollector {
655
+ num_traces,
656
+ trace_id_field_name : "trace_id" . to_string ( ) ,
657
+ span_timestamp_field_name : "span_start_timestamp_secs" . to_string ( ) ,
658
+ } )
659
+ . expect ( "The collector should be JSON serializable." ) ;
675
660
debug ! ( query=%query, "Aggregations query" ) ;
676
661
query
677
662
}
@@ -953,44 +938,21 @@ fn qw_event_to_jaeger_log(event: QwEvent) -> Result<JaegerLog, Status> {
953
938
Ok ( log)
954
939
}
955
940
956
- #[ derive( Deserialize ) ]
957
- struct TraceIdsAggResult {
958
- trace_ids : TraceIdBuckets ,
959
- }
960
-
961
- #[ derive( Deserialize ) ]
962
- struct TraceIdBuckets {
963
- #[ serde( default ) ]
964
- buckets : Vec < TraceIdBucket > ,
965
- }
966
-
967
- #[ derive( Deserialize ) ]
968
- struct TraceIdBucket {
969
- key : String ,
970
- max_span_start_timestamp_micros : MetricValue ,
971
- }
972
-
973
- #[ derive( Deserialize ) ]
974
- struct MetricValue {
975
- value : f64 ,
976
- }
977
-
978
- fn collect_trace_ids ( agg_result_json : & str ) -> Result < ( Vec < TraceId > , TimeIntervalSecs ) , Status > {
979
- let agg_result: TraceIdsAggResult = json_deserialize ( agg_result_json, "trace IDs aggregation" ) ?;
980
- if agg_result. trace_ids . buckets . is_empty ( ) {
941
+ fn collect_trace_ids ( trace_ids_json : & str ) -> Result < ( Vec < B64TraceId > , TimeIntervalSecs ) , Status > {
942
+ let collector_fruit: <FindTraceIdsCollector as Collector >:: Fruit =
943
+ json_deserialize ( trace_ids_json, "trace IDs aggregation" ) ?;
944
+ if collector_fruit. is_empty ( ) {
981
945
return Ok ( ( Vec :: new ( ) , 0 ..=0 ) ) ;
982
946
}
983
- let mut trace_ids = Vec :: with_capacity ( agg_result . trace_ids . buckets . len ( ) ) ;
947
+ let mut trace_ids = Vec :: with_capacity ( collector_fruit . len ( ) ) ;
984
948
let mut start = i64:: MAX ;
985
949
let mut end = i64:: MIN ;
986
950
987
- for bucket in agg_result . trace_ids . buckets {
988
- trace_ids. push ( bucket . key ) ;
989
- start = start. min ( bucket . max_span_start_timestamp_micros . value as i64 ) ;
990
- end = end. max ( bucket . max_span_start_timestamp_micros . value as i64 ) ;
951
+ for trace_id in collector_fruit {
952
+ trace_ids. push ( trace_id . trace_id ) ;
953
+ start = start. min ( trace_id . span_timestamp . into_timestamp_secs ( ) ) ;
954
+ end = end. max ( trace_id . span_timestamp . into_timestamp_secs ( ) ) ;
991
955
}
992
- let start = start / 1_000_000 ;
993
- let end = end / 1_000_000 ;
994
956
Ok ( ( trace_ids, start..=end) )
995
957
}
996
958
@@ -1011,7 +973,7 @@ where T: Deserialize<'a> {
1011
973
match serde_json:: from_str ( json) {
1012
974
Ok ( deserialized) => Ok ( deserialized) ,
1013
975
Err ( error) => {
1014
- error ! ( "Failed to deserialize {label}: {error:?}" , ) ;
976
+ error ! ( "Failed to deserialize {label}: {error:?}" ) ;
1015
977
Err ( Status :: internal ( format ! ( "Failed to deserialize {json}." ) ) )
1016
978
}
1017
979
}
@@ -1020,11 +982,8 @@ where T: Deserialize<'a> {
1020
982
#[ cfg( test) ]
1021
983
mod tests {
1022
984
use quickwit_proto:: jaeger:: api_v2:: ValueType ;
1023
- use quickwit_search:: { encode_term_for_test, MockSearchService } ;
985
+ use quickwit_search:: { encode_term_for_test, MockSearchService , QuickwitAggregations } ;
1024
986
use serde_json:: json;
1025
- use tantivy:: aggregation:: agg_req:: {
1026
- Aggregation , Aggregations , BucketAggregationType , MetricAggregation ,
1027
- } ;
1028
987
1029
988
use super :: * ;
1030
989
@@ -1478,21 +1437,16 @@ mod tests {
1478
1437
#[ test]
1479
1438
fn test_build_aggregations_query ( ) {
1480
1439
let aggregations_query = build_aggregations_query ( 77 ) ;
1481
- let aggregations: Aggregations = serde_json:: from_str ( & aggregations_query) . unwrap ( ) ;
1482
- let aggregation = aggregations. get ( "trace_ids" ) . unwrap ( ) ;
1483
- let Aggregation :: Bucket ( ref bucket_aggregation) = aggregation else {
1484
- panic ! ( "Expected a bucket aggregation!" ) ;
1440
+ let aggregations: QuickwitAggregations = serde_json:: from_str ( & aggregations_query) . unwrap ( ) ;
1441
+ let QuickwitAggregations :: FindTraceIdsAggregation ( collector) = aggregations else {
1442
+ panic ! ( "Expected find trace IDs aggregation!" ) ;
1485
1443
} ;
1486
- let BucketAggregationType :: Terms ( ref terms_aggregation) = bucket_aggregation. bucket_agg else {
1487
- panic ! ( "Expected a terms aggregation!" ) ;
1488
- } ;
1489
- assert_eq ! ( terms_aggregation. field, "trace_id" ) ;
1490
- assert_eq ! ( terms_aggregation. size. unwrap( ) , 77 ) ;
1491
-
1492
- let Aggregation :: Metric ( MetricAggregation :: Max ( max_aggregation) ) = bucket_aggregation. sub_aggregation . get ( "max_span_start_timestamp_micros" ) . unwrap ( ) else {
1493
- panic ! ( "Expected a max metric aggregation!" ) ;
1494
- } ;
1495
- assert_eq ! ( max_aggregation. field, "span_start_timestamp_secs" ) ;
1444
+ assert_eq ! ( collector. num_traces, 77 ) ;
1445
+ assert_eq ! ( collector. trace_id_field_name, "trace_id" ) ;
1446
+ assert_eq ! (
1447
+ collector. span_timestamp_field_name,
1448
+ "span_start_timestamp_secs"
1449
+ ) ;
1496
1450
}
1497
1451
1498
1452
#[ test]
@@ -1738,31 +1692,35 @@ mod tests {
1738
1692
#[ test]
1739
1693
fn test_collect_trace_ids ( ) {
1740
1694
{
1741
- let agg_result_json = r#"{"trace_ids": {}} "# ;
1695
+ let agg_result_json = r#"[] "# ;
1742
1696
let ( trace_ids, _span_timestamps_range) = collect_trace_ids ( agg_result_json) . unwrap ( ) ;
1743
1697
assert ! ( trace_ids. is_empty( ) ) ;
1744
1698
}
1745
1699
{
1746
- let agg_result_json = r#"{
1747
- "trace_ids": {
1748
- "buckets": [
1749
- {"key": "jIr1E97+2DJBcBnOb/wjQg==", "doc_count": 3, "max_span_start_timestamp_micros": {"value": 1674611393000000.0 }}]}}"# ;
1700
+ let agg_result_json = r#"[
1701
+ {
1702
+ "trace_id": "AQEBAQEBAQEBAQEBAQEBAQ==",
1703
+ "span_timestamp": 1736522020000000
1704
+ }
1705
+ ]"# ;
1750
1706
let ( trace_ids, span_timestamps_range) = collect_trace_ids ( agg_result_json) . unwrap ( ) ;
1751
- assert_eq ! ( trace_ids, & [ "jIr1E97+2DJBcBnOb/wjQg==" ] ) ;
1752
- assert_eq ! ( span_timestamps_range, 1674611393 ..=1674611393 ) ;
1707
+ assert_eq ! ( trace_ids. len ( ) , 1 ) ;
1708
+ assert_eq ! ( span_timestamps_range, 1736522020 ..=1736522020 ) ;
1753
1709
}
1754
1710
{
1755
- let agg_result_json = r#"{
1756
- "trace_ids": {
1757
- "buckets": [
1758
- {"key": "FKvicG794620BNsewGCknA==", "doc_count": 7, "max_span_start_timestamp_micros": { "value": 1674611388000000.0 }},
1759
- {"key": "jIr1E97+2DJBcBnOb/wjQg==", "doc_count": 3, "max_span_start_timestamp_micros": { "value": 1674611393000000.0 }}]}}"# ;
1711
+ let agg_result_json = r#"[
1712
+ {
1713
+ "trace_id": "AQIDBAUGBwgJCgsMDQ4PEA==",
1714
+ "span_timestamp": 1736522020000000
1715
+ },
1716
+ {
1717
+ "trace_id": "AgICAgICAgICAgICAgICAg==",
1718
+ "span_timestamp": 1704899620000000
1719
+ }
1720
+ ]"# ;
1760
1721
let ( trace_ids, span_timestamps_range) = collect_trace_ids ( agg_result_json) . unwrap ( ) ;
1761
- assert_eq ! (
1762
- trace_ids,
1763
- & [ "FKvicG794620BNsewGCknA==" , "jIr1E97+2DJBcBnOb/wjQg==" ]
1764
- ) ;
1765
- assert_eq ! ( span_timestamps_range, 1674611388 ..=1674611393 ) ;
1722
+ assert_eq ! ( trace_ids. len( ) , 2 ) ;
1723
+ assert_eq ! ( span_timestamps_range, 1704899620 ..=1736522020 ) ;
1766
1724
}
1767
1725
}
1768
1726
0 commit comments