Skip to content

Commit

Permalink
fix(autocomplete): fix autocomplete duplicate field (#12558)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-leifker authored Feb 6, 2025
1 parent 65376ee commit 52f71dd
Show file tree
Hide file tree
Showing 8 changed files with 173 additions and 132 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ private QueryBuilder buildQueryStringV2(
EntitySpec entitySpec = opContext.getEntityRegistry().getEntitySpec(entityName);
QueryBuilder query =
SearchRequestHandler.getBuilder(
opContext.getEntityRegistry(),
opContext,
entitySpec,
searchConfiguration,
customSearchConfiguration,
Expand Down Expand Up @@ -683,7 +683,7 @@ private QueryBuilder buildQueryStringBrowseAcrossEntities(

QueryBuilder query =
SearchRequestHandler.getBuilder(
finalOpContext.getEntityRegistry(),
finalOpContext,
entitySpecs,
searchConfiguration,
customSearchConfiguration,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ private SearchResult executeAndExtract(
return transformIndexIntoEntityName(
opContext.getSearchContext().getIndexConvention(),
SearchRequestHandler.getBuilder(
opContext.getEntityRegistry(),
opContext,
entitySpec,
searchConfiguration,
customSearchConfiguration,
Expand Down Expand Up @@ -257,7 +257,7 @@ private ScrollResult executeAndExtract(
return transformIndexIntoEntityName(
opContext.getSearchContext().getIndexConvention(),
SearchRequestHandler.getBuilder(
opContext.getEntityRegistry(),
opContext,
entitySpecs,
searchConfiguration,
customSearchConfiguration,
Expand Down Expand Up @@ -311,7 +311,7 @@ public SearchResult search(
"searchRequest",
() ->
SearchRequestHandler.getBuilder(
opContext.getEntityRegistry(),
opContext,
entitySpecs,
searchConfiguration,
customSearchConfiguration,
Expand Down Expand Up @@ -357,7 +357,7 @@ public SearchResult filter(
Filter transformedFilters = transformFilterForEntities(filters, indexConvention);
final SearchRequest searchRequest =
SearchRequestHandler.getBuilder(
opContext.getEntityRegistry(),
opContext,
entitySpec,
searchConfiguration,
customSearchConfiguration,
Expand Down Expand Up @@ -395,7 +395,11 @@ public AutoCompleteResult autoComplete(
IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention();
AutocompleteRequestHandler builder =
AutocompleteRequestHandler.getBuilder(
entitySpec, customSearchConfiguration, queryFilterRewriteChain, searchConfiguration);
opContext,
entitySpec,
customSearchConfiguration,
queryFilterRewriteChain,
searchConfiguration);
SearchRequest req =
builder.getSearchRequest(
opContext,
Expand Down Expand Up @@ -441,7 +445,7 @@ public Map<String, Long> aggregateByValue(
IndexConvention indexConvention = opContext.getSearchContext().getIndexConvention();
final SearchRequest searchRequest =
SearchRequestHandler.getBuilder(
opContext.getEntityRegistry(),
opContext,
entitySpecs,
searchConfiguration,
customSearchConfiguration,
Expand Down Expand Up @@ -578,7 +582,7 @@ private SearchRequest getScrollRequest(
}

return SearchRequestHandler.getBuilder(
opContext.getEntityRegistry(),
opContext,
entitySpecs,
searchConfiguration,
customSearchConfiguration,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import io.datahubproject.metadata.context.OperationContext;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
Expand All @@ -45,9 +46,9 @@
import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder;

@Slf4j
public class AutocompleteRequestHandler {
public class AutocompleteRequestHandler extends BaseRequestHandler {

private final List<Pair> _defaultAutocompleteFields;
private final List<Pair<String, String>> _defaultAutocompleteFields;
private final Map<String, Set<SearchableAnnotation.FieldType>> searchableFieldTypes;

private static final Map<EntitySpec, AutocompleteRequestHandler>
Expand All @@ -58,8 +59,10 @@ public class AutocompleteRequestHandler {
private final EntitySpec entitySpec;
private final QueryFilterRewriteChain queryFilterRewriteChain;
private final SearchConfiguration searchConfiguration;
@Nonnull private final HighlightBuilder highlights;

public AutocompleteRequestHandler(
@Nonnull OperationContext systemOperationContext,
@Nonnull EntitySpec entitySpec,
@Nullable CustomSearchConfiguration customSearchConfiguration,
@Nonnull QueryFilterRewriteChain queryFilterRewriteChain,
Expand All @@ -79,6 +82,7 @@ public AutocompleteRequestHandler(
Double.toString(searchableAnnotation.getBoostScore()))),
Stream.of(Pair.of("urn", "1.0")))
.collect(Collectors.toList());
this.highlights = getDefaultHighlights(systemOperationContext);
searchableFieldTypes =
fieldSpecs.stream()
.collect(
Expand All @@ -98,6 +102,7 @@ public AutocompleteRequestHandler(
}

public static AutocompleteRequestHandler getBuilder(
@Nonnull OperationContext systemOperationContext,
@Nonnull EntitySpec entitySpec,
@Nullable CustomSearchConfiguration customSearchConfiguration,
@Nonnull QueryFilterRewriteChain queryFilterRewriteChain,
Expand All @@ -106,6 +111,7 @@ public static AutocompleteRequestHandler getBuilder(
entitySpec,
k ->
new AutocompleteRequestHandler(
systemOperationContext,
entitySpec,
customSearchConfiguration,
queryFilterRewriteChain,
Expand Down Expand Up @@ -165,7 +171,8 @@ public SearchRequest getSearchRequest(
ESUtils.buildSortOrder(searchSourceBuilder, null, List.of(entitySpec));

// wire inner non-scored query
searchSourceBuilder.highlighter(getHighlights(field));
searchSourceBuilder.highlighter(
field == null || field.isEmpty() ? highlights : getHighlights(opContext, List.of(field)));
searchRequest.source(searchSourceBuilder);
return searchRequest;
}
Expand All @@ -181,7 +188,7 @@ private BoolQueryBuilder getQuery(
public BoolQueryBuilder getQuery(
@Nonnull ObjectMapper objectMapper,
@Nullable AutocompleteConfiguration customAutocompleteConfig,
List<Pair> autocompleteFields,
List<Pair<String, String>> autocompleteFields,
@Nonnull String query) {

BoolQueryBuilder finalQuery =
Expand All @@ -201,7 +208,7 @@ public BoolQueryBuilder getQuery(

private Optional<QueryBuilder> getAutocompleteQuery(
@Nullable AutocompleteConfiguration customConfig,
List<Pair> autocompleteFields,
List<Pair<String, String>> autocompleteFields,
@Nonnull String query) {
Optional<QueryBuilder> result = Optional.empty();

Expand All @@ -212,7 +219,8 @@ private Optional<QueryBuilder> getAutocompleteQuery(
return result;
}

private BoolQueryBuilder defaultQuery(List<Pair> autocompleteFields, @Nonnull String query) {
private BoolQueryBuilder defaultQuery(
List<Pair<String, String>> autocompleteFields, @Nonnull String query) {
BoolQueryBuilder finalQuery = QueryBuilders.boolQuery().minimumShouldMatch(1);

// Search for exact matches with higher boost and ngram matches
Expand Down Expand Up @@ -248,38 +256,25 @@ private BoolQueryBuilder defaultQuery(List<Pair> autocompleteFields, @Nonnull St
return finalQuery;
}

// Get HighlightBuilder to highlight the matched field
private HighlightBuilder getHighlights(@Nullable String field) {
HighlightBuilder highlightBuilder =
new HighlightBuilder()
// Don't set tags to get the original field value
.preTags("")
.postTags("")
.numOfFragments(1);
// Check for each field name and any subfields
getAutocompleteFields(field)
.forEach(
pair -> {
final String fieldName = (String) pair.getLeft();
highlightBuilder
.field(fieldName)
.field(fieldName + ".*")
.field(fieldName + ".ngram")
.field(fieldName + ".delimited");
if (!fieldName.equalsIgnoreCase("urn")) {
highlightBuilder.field(fieldName + ".keyword");
}
});

// set field match req false for ngram
highlightBuilder.fields().stream()
.filter(f -> f.name().contains("ngram"))
.forEach(f -> f.requireFieldMatch(false).noMatchSize(200));

return highlightBuilder;
@Override
public Collection<String> getDefaultQueryFieldNames() {
return _defaultAutocompleteFields.stream().map(Pair::getKey).collect(Collectors.toList());
}

private List<Pair> getAutocompleteFields(@Nullable String field) {
@Override
protected Collection<String> getValidQueryFieldNames() {
return searchableFieldTypes.keySet();
}

@Override
protected Stream<String> highlightFieldExpansion(
@Nonnull OperationContext opContext, @Nonnull String fieldName) {
return Stream.concat(
Stream.of(fieldName, fieldName + ".*", fieldName + ".ngram", fieldName + ".delimited"),
Stream.of(ESUtils.toKeywordField(fieldName, false, opContext.getAspectRetriever())));
}

private List<Pair<String, String>> getAutocompleteFields(@Nullable String field) {
if (field != null && !field.isEmpty() && !field.equalsIgnoreCase("urn")) {
return ImmutableList.of(Pair.of(field, "10.0"));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
package com.linkedin.metadata.search.elasticsearch.query.request;

import com.google.common.annotations.VisibleForTesting;
import io.datahubproject.metadata.context.OperationContext;
import java.util.Collection;
import java.util.Objects;
import java.util.stream.Stream;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.opensearch.search.fetch.subphase.highlight.HighlightBuilder;

public abstract class BaseRequestHandler {

/**
* Provide the fields which are queried by default
*
* @return collection of field names
*/
protected abstract Collection<String> getDefaultQueryFieldNames();

protected abstract Collection<String> getValidQueryFieldNames();

protected abstract Stream<String> highlightFieldExpansion(
@Nonnull OperationContext opContext, @Nonnull String fieldName);

@VisibleForTesting
public HighlightBuilder getDefaultHighlights(@Nonnull OperationContext opContext) {
return getHighlights(opContext, null);
}

@VisibleForTesting
public HighlightBuilder getHighlights(
@Nonnull OperationContext opContext, @Nullable Collection<String> fieldsToHighlight) {
HighlightBuilder highlightBuilder =
new HighlightBuilder()
// Don't set tags to get the original field value
.preTags("")
.postTags("")
.numOfFragments(1);

final Stream<String> fieldStream;
if (fieldsToHighlight == null || fieldsToHighlight.isEmpty()) {
fieldStream = getDefaultQueryFieldNames().stream();
} else {
// filter for valid names
fieldStream =
fieldsToHighlight.stream()
.filter(Objects::nonNull)
.filter(fieldName -> !fieldName.isEmpty())
.filter(getValidQueryFieldNames()::contains);
}

fieldStream
.flatMap(fieldName -> highlightFieldExpansion(opContext, fieldName))
.distinct()
.map(HighlightBuilder.Field::new)
.map(
field -> {
if (field.name().endsWith("ngram")) {
return field.requireFieldMatch(false).noMatchSize(200);
} else {
return field;
}
})
.forEach(highlightBuilder::field);

return highlightBuilder;
}
}
Loading

0 comments on commit 52f71dd

Please sign in to comment.