diff --git a/CHANGELOG.md b/CHANGELOG.md index 5416005e4706c..7be2080d12227 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,6 +45,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix get field mapping API returns 404 error in mixed cluster with multiple versions ([#13624](https://github.com/opensearch-project/OpenSearch/pull/13624)) - Allow clearing `remote_store.compatibility_mode` setting ([#13646](https://github.com/opensearch-project/OpenSearch/pull/13646)) - Fix ReplicaShardBatchAllocator to batch shards without duplicates ([#13710](https://github.com/opensearch-project/OpenSearch/pull/13710)) +- Don't return negative scores from `multi_match` query with `cross_fields` type ([#13829](https://github.com/opensearch-project/OpenSearch/pull/13829)) - Pass parent filter to inner hit query ([#13770](https://github.com/opensearch-project/OpenSearch/pull/13770)) ### Security diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/50_multi_match.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/50_multi_match.yml new file mode 100644 index 0000000000000..b20438c45b33d --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/50_multi_match.yml @@ -0,0 +1,32 @@ +"Cross fields do not return negative scores": + - do: + index: + index: test + id: 1 + body: { "color" : "orange red yellow" } + - do: + index: + index: test + id: 2 + body: { "color": "orange red purple", "shape": "red square" } + - do: + index: + index: test + id: 3 + body: { "color" : "orange red yellow purple" } + - do: + indices.refresh: { } + - do: + search: + index: test + body: + query: + multi_match: + query: "red" + type: "cross_fields" + fields: [ "color", "shape^100"] + tie_breaker: 0.1 + explain: true + - match: { hits.total.value: 3 } + - match: { hits.hits.0._id: "2" } + - gt: { hits.hits.2._score: 0.0 } diff --git a/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java b/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java index b47b974b96fed..17419804b1ffa 100644 --- a/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java +++ b/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java @@ -120,6 +120,7 @@ protected void blend(final TermStates[] contexts, int maxDoc, IndexReader reader } int max = 0; long minSumTTF = Long.MAX_VALUE; + int minDocCount = Integer.MAX_VALUE; for (int i = 0; i < contexts.length; i++) { TermStates ctx = contexts[i]; int df = ctx.docFreq(); @@ -133,11 +134,15 @@ protected void blend(final TermStates[] contexts, int maxDoc, IndexReader reader // we need to find out the minimum sumTTF to adjust the statistics // otherwise the statistics don't match minSumTTF = Math.min(minSumTTF, reader.getSumTotalTermFreq(terms[i].field())); + minDocCount = Math.min(minDocCount, reader.getDocCount(terms[i].field())); } } if (maxDoc > minSumTTF) { maxDoc = (int) minSumTTF; } + if (maxDoc > minDocCount) { + maxDoc = minDocCount; + } if (max == 0) { return; // we are done that term doesn't exist at all }