Skip to content

Commit

Permalink
pass filterbitset as null and add integ tests.
Browse files Browse the repository at this point in the history
Signed-off-by: Wei Wang <[email protected]>
  • Loading branch information
weiwang118 committed Jan 2, 2025
1 parent 105c39a commit c599062
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,10 @@ public enum FilterIdsSelectorType {
public static FilterIdsSelector getFilterIdSelector(final BitSet filterIdsBitSet, final int cardinality) throws IOException {
long[] filterIds;
FilterIdsSelector.FilterIdsSelectorType filterType;
if (filterIdsBitSet instanceof FixedBitSet) {
if (filterIdsBitSet == null) {
filterIds = null;
filterType = FilterIdsSelector.FilterIdsSelectorType.BITMAP;
} else if (filterIdsBitSet instanceof FixedBitSet) {
/**
* When filterIds is dense filter, using fixed bitset
*/
Expand Down
16 changes: 8 additions & 8 deletions src/main/java/org/opensearch/knn/index/query/KNNWeight.java
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,14 @@ public PerLeafResult searchLeaf(LeafReaderContext context, int k) throws IOExcep
Map<Integer, Float> result = doExactSearch(context, new BitSetIterator(filterBitSet, cardinality), cardinality, k);
return new PerLeafResult(filterWeight == null ? null : filterBitSet, result);
}

/*
* If filters match all docs in this segment, then there is no need to do any extra step
* and should directly do ANN Search*/
if (filterWeight != null && cardinality == maxDoc) {
return new PerLeafResult(new FixedBitSet(0), doANNSearch(context, new FixedBitSet(0), 0, k));
}
Map<Integer, Float> docIdsToScoreMap = doANNSearch(context, filterBitSet, cardinality, k);
* If filters match all docs in this segment, then null should be passed as filterBitSet
* so that it will not do a bitset look up in bottom search layer.
*/
final BitSet annFilter = filterWeight != null && cardinality == maxDoc ? null : filterBitSet;
final Map<Integer, Float> docIdsToScoreMap = doANNSearch(context, annFilter, cardinality, k);

// See whether we have to perform exact search based on approx search results
// This is required if there are no native engine files or if approximate search returned
// results less than K, though we have more than k filtered docs
Expand All @@ -161,7 +162,7 @@ public PerLeafResult searchLeaf(LeafReaderContext context, int k) throws IOExcep
Map<Integer, Float> result = doExactSearch(context, docs, cardinality, k);
return new PerLeafResult(filterWeight == null ? null : filterBitSet, result);
}
return new PerLeafResult(filterWeight == null ? null : filterBitSet, docIdsToScoreMap);
return new PerLeafResult((filterWeight == null || cardinality == maxDoc) ? null : filterBitSet, docIdsToScoreMap);
}

private BitSet getFilteredDocsBitSet(final LeafReaderContext ctx) throws IOException {
Expand Down Expand Up @@ -327,7 +328,6 @@ private Map<Integer, Float> doANNSearch(
// Now that we have the allocation, we need to readLock it
indexAllocation.readLock();
indexAllocation.incRef();

try {
if (indexAllocation.isClosed()) {
throw new RuntimeException("Index has already been closed");
Expand Down
11 changes: 1 addition & 10 deletions src/test/java/org/opensearch/knn/index/query/KNNWeightTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -769,16 +769,7 @@ public void testANNWithFilterQuery_whenFiltersMatchAllDocs_thenSuccess() {
}

jniServiceMockedStatic.when(
() -> JNIService.queryIndex(
anyLong(),
eq(QUERY_VECTOR),
eq(k),
eq(HNSW_METHOD_PARAMETERS),
any(),
eq(new FixedBitSet(0).getBits()),
anyInt(),
any()
)
() -> JNIService.queryIndex(anyLong(), eq(QUERY_VECTOR), eq(k), eq(HNSW_METHOD_PARAMETERS), any(), eq(null), anyInt(), any())
).thenReturn(getFilteredKNNQueryResults());

final Bits liveDocsBits = mock(Bits.class);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.knn.integ;

import com.google.common.collect.ImmutableMap;
import lombok.SneakyThrows;
import lombok.extern.log4j.Log4j2;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.opensearch.client.Response;
import org.opensearch.common.settings.Settings;
import org.opensearch.knn.KNNJsonQueryBuilder;
import org.opensearch.knn.KNNRestTestCase;
import org.opensearch.knn.index.KNNSettings;
import java.util.List;

import static org.opensearch.knn.common.KNNConstants.FAISS_NAME;
import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW;

@Log4j2
public class FilteredSearchANNSearchIT extends KNNRestTestCase {
@SneakyThrows
public void testFilteredSearchWithFaissHnsw_whenFiltersMatchAllDocs_thenReturnCorrectResults() {
String filterFieldName = "color";
final int expectResultSize = randomIntBetween(1, 3);
final String filterValue = "red";
createKnnIndex(INDEX_NAME, getKNNDefaultIndexSettings(), createKnnIndexMapping(FIELD_NAME, 3, METHOD_HNSW, FAISS_NAME));

// ingest 4 vector docs into the index with the same field {"color": "red"}
for (int i = 0; i < 4; i++) {
addKnnDocWithAttributes(String.valueOf(i), new float[] { i, i, i }, ImmutableMap.of(filterFieldName, filterValue));
}

refreshIndex(INDEX_NAME);
forceMergeKnnIndex(INDEX_NAME);

updateIndexSettings(INDEX_NAME, Settings.builder().put(KNNSettings.ADVANCED_FILTERED_EXACT_SEARCH_THRESHOLD, 0));

Float[] queryVector = { 3f, 3f, 3f };
// All docs in one segment will match the filters value
String query = KNNJsonQueryBuilder.builder()
.fieldName(FIELD_NAME)
.vector(queryVector)
.k(expectResultSize)
.filterFieldName(filterFieldName)
.filterValue(filterValue)
.build()
.getQueryString();
Response response = searchKNNIndex(INDEX_NAME, query, expectResultSize);
String entity = EntityUtils.toString(response.getEntity());
List<String> docIds = parseIds(entity);
assertEquals(expectResultSize, docIds.size());
assertEquals(expectResultSize, parseTotalSearchHits(entity));
}
}

0 comments on commit c599062

Please sign in to comment.