-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support sub agg in filter rewrite optimization #17447
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -94,7 +94,7 @@ | |
import java.util.stream.Collectors; | ||
|
||
import static org.opensearch.search.aggregations.MultiBucketConsumerService.MAX_BUCKET_SETTING; | ||
import static org.opensearch.search.aggregations.bucket.filterrewrite.DateHistogramAggregatorBridge.segmentMatchAll; | ||
import static org.opensearch.search.aggregations.bucket.filterrewrite.AggregatorBridge.segmentMatchAll; | ||
|
||
/** | ||
* Main aggregator that aggregates docs from multiple aggregations | ||
|
@@ -563,14 +563,23 @@ private void processLeafFromQuery(LeafReaderContext ctx, Sort indexSortPrefix) t | |
} | ||
} | ||
|
||
@Override | ||
protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws IOException { | ||
finishLeaf(); // May need to wrap up previous leaf if it could not be precomputed | ||
return filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); | ||
} | ||
// @Override | ||
// protected boolean tryPrecomputeAggregationForLeaf(LeafReaderContext ctx) throws IOException { | ||
// finishLeaf(); // May need to wrap up previous leaf if it could not be precomputed | ||
// return filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); | ||
// } | ||
|
||
@Override | ||
protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { | ||
boolean optimized = filterRewriteOptimizationContext.tryOptimize( | ||
ctx, | ||
this::incrementBucketDocCount, | ||
segmentMatchAll(context, ctx), | ||
collectableSubAggregators, | ||
sub | ||
); | ||
if (optimized) throw new CollectionTerminatedException(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why create a separate variable here instead of just calling |
||
|
||
finishLeaf(); | ||
|
||
boolean fillDocIdSet = deferredCollectors != NO_OP_COLLECTOR; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,12 +14,17 @@ | |
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.index.NumericDocValues; | ||
import org.apache.lucene.index.PointValues; | ||
import org.apache.lucene.search.DocIdSetIterator; | ||
import org.apache.lucene.util.DocIdSetBuilder; | ||
import org.opensearch.index.mapper.DocCountFieldMapper; | ||
import org.opensearch.search.aggregations.BucketCollector; | ||
import org.opensearch.search.aggregations.LeafBucketCollector; | ||
import org.opensearch.search.internal.SearchContext; | ||
|
||
import java.io.IOException; | ||
import java.util.concurrent.atomic.AtomicInteger; | ||
import java.util.function.BiConsumer; | ||
import java.util.function.Supplier; | ||
|
||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; | ||
|
||
|
@@ -42,6 +47,8 @@ public final class FilterRewriteOptimizationContext { | |
|
||
private Ranges ranges; // built at shard level | ||
|
||
private int subAggLength; | ||
|
||
// debug info related fields | ||
private final AtomicInteger leafNodeVisited = new AtomicInteger(); | ||
private final AtomicInteger innerNodeVisited = new AtomicInteger(); | ||
|
@@ -65,7 +72,8 @@ public FilterRewriteOptimizationContext( | |
private boolean canOptimize(final Object parent, final int subAggLength, SearchContext context) throws IOException { | ||
if (context.maxAggRewriteFilters() == 0) return false; | ||
|
||
if (parent != null || subAggLength != 0) return false; | ||
if (parent != null) return false; | ||
this.subAggLength = subAggLength; | ||
|
||
boolean canOptimize = aggregatorBridge.canOptimize(); | ||
if (canOptimize) { | ||
|
@@ -96,8 +104,13 @@ void setRanges(Ranges ranges) { | |
* @param incrementDocCount consume the doc_count results for certain ordinal | ||
* @param segmentMatchAll if your optimization can prepareFromSegment, you should pass in this flag to decide whether to prepareFromSegment | ||
*/ | ||
public boolean tryOptimize(final LeafReaderContext leafCtx, final BiConsumer<Long, Long> incrementDocCount, boolean segmentMatchAll) | ||
throws IOException { | ||
public boolean tryOptimize( | ||
final LeafReaderContext leafCtx, | ||
final BiConsumer<Long, Long> incrementDocCount, | ||
boolean segmentMatchAll, | ||
BucketCollector collectableSubAggregators, | ||
LeafBucketCollector sub | ||
) throws IOException { | ||
segments.incrementAndGet(); | ||
if (!canOptimize) { | ||
return false; | ||
|
@@ -123,12 +136,43 @@ public boolean tryOptimize(final LeafReaderContext leafCtx, final BiConsumer<Lon | |
Ranges ranges = getRanges(leafCtx, segmentMatchAll); | ||
if (ranges == null) return false; | ||
|
||
consumeDebugInfo(aggregatorBridge.tryOptimize(values, incrementDocCount, ranges)); | ||
Supplier<DocIdSetBuilder> disBuilderSupplier = null; | ||
if (subAggLength != 0) { | ||
disBuilderSupplier = () -> { | ||
try { | ||
return new DocIdSetBuilder(leafCtx.reader().maxDoc(), values, aggregatorBridge.fieldType.name()); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
Comment on lines
+144
to
+146
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we catching a checked exception and rethrowing an unchecked exception here when the method is documented to throw IOException? |
||
}; | ||
} | ||
OptimizeResult optimizeResult = aggregatorBridge.tryOptimize(values, incrementDocCount, ranges, disBuilderSupplier); | ||
consumeDebugInfo(optimizeResult); | ||
|
||
optimizedSegments.incrementAndGet(); | ||
logger.debug("Fast filter optimization applied to shard {} segment {}", shardId, leafCtx.ord); | ||
logger.debug("Crossed leaf nodes: {}, inner nodes: {}", leafNodeVisited, innerNodeVisited); | ||
|
||
if (subAggLength == 0) { | ||
return true; | ||
} | ||
|
||
// Handle sub aggregation | ||
for (int bucketOrd = 0; bucketOrd < optimizeResult.builders.length; bucketOrd++) { | ||
logger.debug("Collecting bucket {} for sub aggregation", bucketOrd); | ||
DocIdSetBuilder builder = optimizeResult.builders[bucketOrd]; | ||
if (builder == null) { | ||
continue; | ||
} | ||
DocIdSetIterator iterator = optimizeResult.builders[bucketOrd].build().iterator(); | ||
while (iterator.nextDoc() != NO_MORE_DOCS) { | ||
int currentDoc = iterator.docID(); | ||
sub.collect(currentDoc, bucketOrd); | ||
} | ||
// resetting the sub collector after processing each bucket | ||
sub = collectableSubAggregators.getLeafCollector(leafCtx); | ||
} | ||
|
||
return true; | ||
} | ||
|
||
|
@@ -160,10 +204,12 @@ private Ranges getRangesFromSegment(LeafReaderContext leafCtx, boolean segmentMa | |
/** | ||
* Contains debug info of BKD traversal to show in profile | ||
*/ | ||
static class DebugInfo { | ||
static class OptimizeResult { | ||
private final AtomicInteger leafNodeVisited = new AtomicInteger(); // leaf node visited | ||
private final AtomicInteger innerNodeVisited = new AtomicInteger(); // inner node visited | ||
|
||
public DocIdSetBuilder[] builders; | ||
|
||
void visitLeaf() { | ||
leafNodeVisited.incrementAndGet(); | ||
} | ||
|
@@ -173,7 +219,7 @@ void visitInner() { | |
} | ||
} | ||
|
||
void consumeDebugInfo(DebugInfo debug) { | ||
void consumeDebugInfo(OptimizeResult debug) { | ||
leafNodeVisited.addAndGet(debug.leafNodeVisited.get()); | ||
innerNodeVisited.addAndGet(debug.innerNodeVisited.get()); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I assume this will be removed when this is ready for review. :)