Skip to content

Commit 0348405

Browse files
committed
Get count from split metadata on simple query with timerange
1 parent de696d4 commit 0348405

File tree

2 files changed

+68
-44
lines changed

2 files changed

+68
-44
lines changed

quickwit/quickwit-search/src/leaf.rs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,12 @@ async fn leaf_search_single_split(
463463
// This may be the case for AllQuery with a sort by date and time filter, where the current
464464
// split can't have better results.
465465
//
466-
if is_metadata_count_request_with_ast(&query_ast, &search_request) {
466+
if is_metadata_count_request_with_ast(
467+
&query_ast,
468+
&search_request,
469+
split.timestamp_start,
470+
split.timestamp_end,
471+
) {
467472
return Ok(get_leaf_resp_from_count(split.num_docs));
468473
}
469474

@@ -535,7 +540,12 @@ async fn leaf_search_single_split(
535540
check_optimize_search_request(&mut search_request, &split, &split_filter);
536541
collector.update_search_param(&search_request);
537542
let mut leaf_search_response: LeafSearchResponse =
538-
if is_metadata_count_request_with_ast(&query_ast, &search_request) {
543+
if is_metadata_count_request_with_ast(
544+
&query_ast,
545+
&search_request,
546+
split.timestamp_start,
547+
split.timestamp_end,
548+
) {
539549
get_leaf_resp_from_count(searcher.num_docs())
540550
} else if collector.is_count_only() {
541551
let count = query.count(&searcher)? as u64;

quickwit/quickwit-search/src/root.rs

Lines changed: 56 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -636,9 +636,13 @@ async fn search_partial_hits_phase_with_scroll(
636636
/// metadata count.
637637
///
638638
/// This is done by exclusion, so we will need to keep it up to date if fields are added.
639-
pub fn is_metadata_count_request(request: &SearchRequest) -> bool {
639+
pub fn is_metadata_count_request(request: &SearchRequest, split: &SplitMetadata) -> bool {
640640
let query_ast: QueryAst = serde_json::from_str(&request.query_ast).unwrap();
641-
is_metadata_count_request_with_ast(&query_ast, request)
641+
642+
let start_time = split.time_range.as_ref().map(|x| x.start()).copied();
643+
let end_time = split.time_range.as_ref().map(|x| x.end()).copied();
644+
645+
is_metadata_count_request_with_ast(&query_ast, request, start_time, end_time)
642646
}
643647

644648
/// Check if the request is a count request without any filters, so we can just return the split
@@ -647,42 +651,47 @@ pub fn is_metadata_count_request(request: &SearchRequest) -> bool {
647651
/// This is done by exclusion, so we will need to keep it up to date if fields are added.
648652
///
649653
/// The passed query_ast should match the serialized on in request.
650-
pub fn is_metadata_count_request_with_ast(query_ast: &QueryAst, request: &SearchRequest) -> bool {
654+
pub fn is_metadata_count_request_with_ast(
655+
query_ast: &QueryAst,
656+
request: &SearchRequest,
657+
split_start_timestamp: Option<i64>,
658+
split_end_timestamp: Option<i64>,
659+
) -> bool {
651660
if query_ast != &QueryAst::MatchAll {
652661
return false;
653662
}
654663
if request.max_hits != 0 {
655664
return false;
656665
}
657666

658-
// If the start and end timestamp encompass the whole split, it is still a count query.
659-
// We remove this currently on the leaf level, but not yet on the root level.
660-
// There's a small advantage when we would do this on the root level, since we have the
661-
// counts available on the split. On the leaf it is currently required to open the split
662-
// to get the count.
663-
if request.start_timestamp.is_some() || request.end_timestamp.is_some() {
664-
return false;
667+
match (request.start_timestamp, split_start_timestamp) {
668+
(Some(request_start), Some(split_start)) if split_start >= request_start => {}
669+
(Some(_), _) => return false,
670+
(None, _) => {}
665671
}
672+
match (request.end_timestamp, split_end_timestamp) {
673+
(Some(request_end), Some(split_end)) if split_end < request_end => {}
674+
(Some(_), _) => return false,
675+
(None, _) => {}
676+
}
677+
666678
if request.aggregation_request.is_some() || !request.snippet_fields.is_empty() {
667679
return false;
668680
}
669681
true
670682
}
671683

672684
/// Get a leaf search response that returns the num_docs of the split
673-
pub fn get_count_from_metadata(split_metadatas: &[SplitMetadata]) -> Vec<LeafSearchResponse> {
674-
split_metadatas
675-
.iter()
676-
.map(|metadata| LeafSearchResponse {
677-
num_hits: metadata.num_docs as u64,
678-
partial_hits: Vec::new(),
679-
failed_splits: Vec::new(),
680-
num_attempted_splits: 1,
681-
num_successful_splits: 1,
682-
intermediate_aggregation_result: None,
683-
resource_stats: None,
684-
})
685-
.collect()
685+
pub fn get_count_from_metadata(metadata: &SplitMetadata) -> LeafSearchResponse {
686+
LeafSearchResponse {
687+
num_hits: metadata.num_docs as u64,
688+
partial_hits: Vec::new(),
689+
failed_splits: Vec::new(),
690+
num_attempted_splits: 1,
691+
num_successful_splits: 1,
692+
intermediate_aggregation_result: None,
693+
resource_stats: None,
694+
}
686695
}
687696

688697
/// Returns true if the query is particularly memory intensive.
@@ -730,26 +739,31 @@ pub(crate) async fn search_partial_hits_phase(
730739
split_metadatas: &[SplitMetadata],
731740
cluster_client: &ClusterClient,
732741
) -> crate::Result<LeafSearchResponse> {
733-
let leaf_search_responses: Vec<LeafSearchResponse> =
734-
if is_metadata_count_request(search_request) {
735-
get_count_from_metadata(split_metadatas)
742+
let mut leaf_search_responses: Vec<LeafSearchResponse> =
743+
Vec::with_capacity(split_metadatas.len());
744+
let mut leaf_search_jobs = Vec::new();
745+
for split in split_metadatas {
746+
if is_metadata_count_request(search_request, split) {
747+
leaf_search_responses.push(get_count_from_metadata(split));
736748
} else {
737-
let jobs: Vec<SearchJob> = split_metadatas.iter().map(SearchJob::from).collect();
738-
let assigned_leaf_search_jobs = cluster_client
739-
.search_job_placer
740-
.assign_jobs(jobs, &HashSet::default())
741-
.await?;
742-
let mut leaf_request_tasks = Vec::new();
743-
for (client, client_jobs) in assigned_leaf_search_jobs {
744-
let leaf_request = jobs_to_leaf_request(
745-
search_request,
746-
indexes_metas_for_leaf_search,
747-
client_jobs,
748-
)?;
749-
leaf_request_tasks.push(cluster_client.leaf_search(leaf_request, client.clone()));
750-
}
751-
try_join_all(leaf_request_tasks).await?
752-
};
749+
leaf_search_jobs.push(SearchJob::from(split));
750+
}
751+
}
752+
753+
if !leaf_search_jobs.is_empty() {
754+
let assigned_leaf_search_jobs = cluster_client
755+
.search_job_placer
756+
.assign_jobs(leaf_search_jobs, &HashSet::default())
757+
.await?;
758+
let mut leaf_request_tasks = Vec::new();
759+
for (client, client_jobs) in assigned_leaf_search_jobs {
760+
let leaf_request =
761+
jobs_to_leaf_request(search_request, indexes_metas_for_leaf_search, client_jobs)?;
762+
leaf_request_tasks.push(cluster_client.leaf_search(leaf_request, client.clone()));
763+
}
764+
let executed_leaf_search_responses = try_join_all(leaf_request_tasks).await?;
765+
leaf_search_responses.extend(executed_leaf_search_responses);
766+
}
753767

754768
// Creates a collector which merges responses into one
755769
let merge_collector =

0 commit comments

Comments
 (0)