Skip to content

Commit f00978c

Browse files
Fix layout and enable PushDownJoinPastProject rule
1 parent 724564f commit f00978c

File tree

3 files changed

+191
-97
lines changed

3 files changed

+191
-97
lines changed

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/AbstractLookupService.java

Lines changed: 95 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,8 @@ private void doLookup(T request, CancellableTask task, ActionListener<List<Page>
368368
List<Operator> operators = new ArrayList<>();
369369

370370
// get the fields from the right side, as specified in extractFields
371-
extractRightFields(request, shardContext, driverContext, releasables, operators);
371+
// Also extract additional right-side fields that are referenced in post-join filters but not in extractFields
372+
extractRightFields(queryList, request, shardContext, driverContext, builder, releasables, operators);
372373

373374
// get the left side fields that are needed for filter application
374375
// we read them from the input page and populate in the output page
@@ -441,27 +442,36 @@ public void onFailure(Exception e) {
441442
}
442443
}
443444

445+
/**
446+
* Field for DocID in lookup operations. Contains a DocVector.
447+
*/
448+
private static final EsField LOOKUP_DOC_ID_FIELD = new EsField(
449+
"$$DocID$$",
450+
DataType.DOC_DATA_TYPE,
451+
Map.of(),
452+
false,
453+
EsField.TimeSeriesFieldType.NONE
454+
);
455+
456+
/**
457+
* Field for Positions in lookup operations. Contains an IntBlock of positions.
458+
*/
459+
private static final EsField LOOKUP_POSITIONS_FIELD = new EsField(
460+
"$$Positions$$",
461+
DataType.INTEGER,
462+
Map.of(),
463+
false,
464+
EsField.TimeSeriesFieldType.NONE
465+
);
466+
444467
/**
445468
* Creates a Layout.Builder for lookup operations with Docs and Positions fields.
446469
*/
447470
private static Layout.Builder createLookupLayoutBuilder() {
448471
Layout.Builder builder = new Layout.Builder();
449472
// append the docsIds and positions to the layout
450-
builder.append(
451-
// this looks wrong, what is the datatype for the Docs? It says DocVector but it is not a DataType
452-
new FieldAttribute(
453-
Source.EMPTY,
454-
"$$DocID$$",
455-
new EsField("$$DocID$$", DataType.DOC_DATA_TYPE, Collections.emptyMap(), false, EsField.TimeSeriesFieldType.NONE)
456-
)
457-
);
458-
builder.append(
459-
new FieldAttribute(
460-
Source.EMPTY,
461-
"$$Positions$$",
462-
new EsField("$$Positions$$", DataType.INTEGER, Collections.emptyMap(), false, EsField.TimeSeriesFieldType.NONE)
463-
)
464-
);
473+
builder.append(new FieldAttribute(Source.EMPTY, null, null, LOOKUP_DOC_ID_FIELD.getName(), LOOKUP_DOC_ID_FIELD));
474+
builder.append(new FieldAttribute(Source.EMPTY, null, null, LOOKUP_POSITIONS_FIELD.getName(), LOOKUP_POSITIONS_FIELD));
465475
return builder;
466476
}
467477

@@ -493,21 +503,88 @@ private EnrichQuerySourceOperator createQueryOperator(
493503

494504
/**
495505
* Extracts right-side fields from the lookup index and creates the extractFields operator.
506+
* Also extracts additional right-side fields that are referenced in post-join filters but not in extractFields.
496507
*/
497508
private void extractRightFields(
509+
LookupEnrichQueryGenerator queryList,
498510
T request,
499511
LookupShardContext shardContext,
500512
DriverContext driverContext,
513+
Layout.Builder builder,
501514
List<Releasable> releasables,
502515
List<Operator> operators
503516
) {
504-
if (request.extractFields.isEmpty() == false) {
505-
var extractFieldsOperator = extractFieldsOperator(shardContext.context, driverContext, request.extractFields);
517+
// Start with the original extractFields
518+
List<NamedExpression> allExtractFields = new ArrayList<>(request.extractFields);
519+
520+
// Collect additional right-side fields referenced in post-join filters but not in extractFields
521+
collectAdditionalRightFieldsForFilters(queryList, request, builder, allExtractFields);
522+
523+
// Create a single operator for all extract fields
524+
if (allExtractFields.isEmpty() == false) {
525+
var extractFieldsOperator = extractFieldsOperator(shardContext.context, driverContext, allExtractFields);
506526
releasables.add(extractFieldsOperator);
507527
operators.add(extractFieldsOperator);
508528
}
509529
}
510530

531+
/**
532+
* Collects additional right-side fields that are referenced in post-join filters but not in extractFields.
533+
* These fields are added to allExtractFields and the layout builder.
534+
*/
535+
private void collectAdditionalRightFieldsForFilters(
536+
LookupEnrichQueryGenerator queryList,
537+
T request,
538+
Layout.Builder builder,
539+
List<NamedExpression> allExtractFields
540+
) {
541+
if (queryList instanceof PostJoinFilterable postJoinFilterable) {
542+
List<Expression> postJoinFilterExpressions = postJoinFilterable.getPostJoinFilter();
543+
if (postJoinFilterExpressions.isEmpty() == false) {
544+
LookupFromIndexService.TransportRequest lookupRequest = (LookupFromIndexService.TransportRequest) request;
545+
// Build a set of extractFields NameIDs
546+
Set<NameId> extractFieldNameIds = new HashSet<>();
547+
for (NamedExpression extractField : request.extractFields) {
548+
extractFieldNameIds.add(extractField.id());
549+
}
550+
// Collect right-side field NameIDs from EsRelation in rightPreJoinPlan
551+
Set<NameId> rightSideFieldNameIds = collectRightSideFieldNameIds(lookupRequest);
552+
553+
// Collect right-side attributes referenced in post-join filters but not in extractFields
554+
Set<NameId> addedNameIds = new HashSet<>();
555+
for (Expression filterExpr : postJoinFilterExpressions) {
556+
for (Attribute attr : filterExpr.references()) {
557+
NameId nameId = attr.id();
558+
// If it's a right-side field but not in extractFields, we need to extract it
559+
if (rightSideFieldNameIds.contains(nameId) && extractFieldNameIds.contains(nameId) == false) {
560+
if (addedNameIds.contains(nameId) == false) {
561+
allExtractFields.add(attr);
562+
builder.append(attr);
563+
addedNameIds.add(nameId);
564+
}
565+
}
566+
}
567+
}
568+
}
569+
}
570+
}
571+
572+
/**
573+
* Collects right-side field NameIDs from EsRelation in the rightPreJoinPlan.
574+
* Similar to collectLeftSideFieldsToBroadcast, but collects right-side fields instead.
575+
*/
576+
private static Set<NameId> collectRightSideFieldNameIds(LookupFromIndexService.TransportRequest request) {
577+
Set<NameId> rightSideFieldNameIds = new HashSet<>();
578+
if (request.getRightPreJoinPlan() instanceof FragmentExec fragmentExec) {
579+
fragmentExec.fragment().forEachDown(EsRelation.class, esRelation -> {
580+
for (Attribute attr : esRelation.output()) {
581+
rightSideFieldNameIds.add(attr.id());
582+
}
583+
});
584+
}
585+
return rightSideFieldNameIds;
586+
}
587+
511588
/**
512589
* Extracts left-side fields that need to be broadcast and creates the matchFields operator.
513590
* Collects left-side fields from post-join filter expressions and broadcasts them.

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/PushDownJoinPastProject.java

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,6 @@ protected LogicalPlan rule(Join join) {
5050
}
5151

5252
if (join.left() instanceof Project project && join.config().type() == JoinTypes.LEFT) {
53-
// Check for conflicts between Project aliases and join condition attributes
54-
// If conflicts exist, disable the rule to avoid complications
55-
if (hasConflictsWithLookupJoinOnCondition(project, join)) {
56-
return join;
57-
}
5853
AttributeMap.Builder<Expression> aliasBuilder = AttributeMap.builder();
5954
project.forEachExpression(Alias.class, a -> aliasBuilder.put(a.toAttribute(), a.child()));
6055
var aliasesFromProject = aliasBuilder.build();
@@ -123,48 +118,4 @@ protected LogicalPlan rule(Join join) {
123118

124119
return join;
125120
}
126-
127-
/**
128-
* Checks if pushing down the Project would cause conflicts with attributes in the join condition.
129-
* A conflict occurs when a left-side attribute in the join condition, after being resolved through
130-
* Project aliases to the child's attribute names, would conflict with a right-side output field name.
131-
*
132-
* @param project The Project to be pushed down
133-
* @param join The Join operation
134-
* @return true if conflicts are detected, false otherwise
135-
*/
136-
private static boolean hasConflictsWithLookupJoinOnCondition(Project project, Join join) {
137-
if (join.config().joinOnConditions() == null) {
138-
return false;
139-
}
140-
141-
// Build aliases map from Project
142-
AttributeMap.Builder<Expression> aliasBuilder = AttributeMap.builder();
143-
project.forEachExpression(Alias.class, a -> aliasBuilder.put(a.toAttribute(), a.child()));
144-
AttributeMap<Expression> aliasesFromProject = aliasBuilder.build();
145-
146-
// Get right-side output field names
147-
Set<String> rightSideNames = new HashSet<>(Expressions.names(join.rightOutputFields()));
148-
149-
// Get left-side attribute names from the child (these are the names that will be available after removing the Project)
150-
Set<String> childOutputNames = new HashSet<>(Expressions.names(project.child().output()));
151-
152-
// Check each attribute referenced in the join condition
153-
for (Attribute attr : join.config().joinOnConditions().references()) {
154-
// Resolve attr through Project aliases to get the underlying expression
155-
Expression resolved = aliasesFromProject.resolve(attr, attr);
156-
Attribute resolvedAttr = resolved instanceof Attribute ? (Attribute) resolved : attr;
157-
String resolvedName = resolvedAttr.name();
158-
159-
// Check if the resolved attribute is from the left side (by name)
160-
if (childOutputNames.contains(resolvedName)) {
161-
// Check if the resolved attribute name conflicts with right-side names
162-
if (rightSideNames.contains(resolvedName)) {
163-
return true;
164-
}
165-
}
166-
}
167-
168-
return false;
169-
}
170121
}

0 commit comments

Comments
 (0)