diff --git a/CHANGELOG.md b/CHANGELOG.md index 519a077f49..6d6a2eb791 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Enhancements ### Bug Fixes ### Infrastructure +* Removed JDK 11 and 17 version from CI runs [#1921](https://github.com/opensearch-project/k-NN/pull/1921) +* Upgrade min JDK compatibility to JDK 21 [#2422](https://github.com/opensearch-project/k-NN/pull/2422) ### Documentation ### Maintenance ### Refactoring @@ -18,21 +20,37 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add Support for Multi Values in innerHit for Nested k-NN Fields in Lucene and FAISS (#2283)[https://github.com/opensearch-project/k-NN/pull/2283] - Add binary index support for Lucene engine. (#2292)[https://github.com/opensearch-project/k-NN/pull/2292] - Add expand_nested_docs Parameter support to NMSLIB engine (#2331)[https://github.com/opensearch-project/k-NN/pull/2331] +- Add cosine similarity support for faiss engine (#2376)[https://github.com/opensearch-project/k-NN/pull/2376] ### Enhancements - Introduced a writing layer in native engines where relies on the writing interface to process IO. (#2241)[https://github.com/opensearch-project/k-NN/pull/2241] - Allow method parameter override for training based indices (#2290) https://github.com/opensearch-project/k-NN/pull/2290] - Optimizes lucene query execution to prevent unnecessary rewrites (#2305)[https://github.com/opensearch-project/k-NN/pull/2305] +- Add check to directly use ANN Search when filters match all docs. (#2320)[https://github.com/opensearch-project/k-NN/pull/2320] +- Use one formula to calculate cosine similarity (#2357)[https://github.com/opensearch-project/k-NN/pull/2357] +- Add WithFieldName implementation to KNNQueryBuilder (#2398)[https://github.com/opensearch-project/k-NN/pull/2398] +- Make the build work for M series MacOS without manual code changes and local JAVA_HOME config (#2397)[https://github.com/opensearch-project/k-NN/pull/2397] +- Remove DocsWithFieldSet reference from NativeEngineFieldVectorsWriter (#2408)[https://github.com/opensearch-project/k-NN/pull/2408] ### Bug Fixes * Fixing the bug when a segment has no vector field present for disk based vector search (#2282)[https://github.com/opensearch-project/k-NN/pull/2282] +* Fixing the bug where search fails with "fields" parameter for an index with a knn_vector field (#2314)[https://github.com/opensearch-project/k-NN/pull/2314] +* Fix for NPE while merging segments after all the vector fields docs are deleted (#2365)[https://github.com/opensearch-project/k-NN/pull/2365] * Allow validation for non knn index only after 2.17.0 (#2315)[https://github.com/opensearch-project/k-NN/pull/2315] +* Fixing the bug to prevent updating the index.knn setting after index creation(#2348)[https://github.com/opensearch-project/k-NN/pull/2348] * Release query vector memory after execution (#2346)[https://github.com/opensearch-project/k-NN/pull/2346] * Fix shard level rescoring disabled setting flag (#2352)[https://github.com/opensearch-project/k-NN/pull/2352] +* Fix filter rewrite logic which was resulting in getting inconsistent / incorrect results for cases where filter was getting rewritten for shards (#2359)[https://github.com/opensearch-project/k-NN/pull/2359] +* Fixing it to retrieve space_type from index setting when both method and top level don't have the value. [#2374](https://github.com/opensearch-project/k-NN/pull/2374) +* Fixing the bug where setting rescore as false for on_disk knn_vector query is a no-op (#2399)[https://github.com/opensearch-project/k-NN/pull/2399] ### Infrastructure * Updated C++ version in JNI from c++11 to c++17 [#2259](https://github.com/opensearch-project/k-NN/pull/2259) * Upgrade bytebuddy and objenesis version to match OpenSearch core and, update github ci runner for macos [#2279](https://github.com/opensearch-project/k-NN/pull/2279) ### Documentation ### Maintenance * Select index settings based on cluster version[2236](https://github.com/opensearch-project/k-NN/pull/2236) +* Added periodic cache maintenance for QuantizationStateCache and NativeMemoryCache [#2308](https://github.com/opensearch-project/k-NN/pull/2308) * Added null checks for fieldInfo in ExactSearcher to avoid NPE while running exact search for segments with no vector field (#2278)[https://github.com/opensearch-project/k-NN/pull/2278] +* Added Lucene BWC tests (#2313)[https://github.com/opensearch-project/k-NN/pull/2313] * Upgrade jsonpath from 2.8.0 to 2.9.0[2325](https://github.com/opensearch-project/k-NN/pull/2325) +* Bump Faiss commit from 1f42e81 to 0cbc2a8 to accelerate hamming distance calculation using _mm512_popcnt_epi64 intrinsic and also add avx512-fp16 instructions to boost performance [#2381](https://github.com/opensearch-project/k-NN/pull/2381) +* Enabled indices.breaker.total.use_real_memory setting via build.gradle for integTest Cluster to catch heap CB in local ITs and github CI actions [#2395](https://github.com/opensearch-project/k-NN/pull/2395/) ### Refactoring diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngineFieldVectorsWriter.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngineFieldVectorsWriter.java index 389c76e49a..88eee0ee7e 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngineFieldVectorsWriter.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngineFieldVectorsWriter.java @@ -14,7 +14,6 @@ import lombok.Getter; import org.apache.lucene.codecs.KnnFieldVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter; -import org.apache.lucene.index.DocsWithFieldSet; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.util.InfoStream; import org.apache.lucene.util.RamUsageEstimator; @@ -43,9 +42,8 @@ class NativeEngineFieldVectorsWriter extends KnnFieldVectorsWriter { @Getter private final Map vectors; private int lastDocID = -1; - @Getter - private final DocsWithFieldSet docsWithField; private final InfoStream infoStream; + @Getter private final FlatFieldVectorsWriter flatFieldVectorsWriter; @SuppressWarnings("unchecked") @@ -75,7 +73,6 @@ private NativeEngineFieldVectorsWriter( this.fieldInfo = fieldInfo; this.infoStream = infoStream; vectors = new HashMap<>(); - this.docsWithField = new DocsWithFieldSet(); this.flatFieldVectorsWriter = flatFieldVectorsWriter; } @@ -101,7 +98,6 @@ public void addValue(int docID, T vectorValue) throws IOException { // ensuring that vector is provided to flatFieldWriter. flatFieldVectorsWriter.addValue(docID, vectorValue); vectors.put(docID, vectorValue); - docsWithField.add(docID); lastDocID = docID; } @@ -121,10 +117,9 @@ public T copyValue(T vectorValue) { */ @Override public long ramBytesUsed() { - return SHALLOW_SIZE + docsWithField.ramBytesUsed() + (long) this.vectors.size() * (long) (RamUsageEstimator.NUM_BYTES_OBJECT_REF - + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER) + (long) this.vectors.size() * RamUsageEstimator.shallowSizeOfInstance( - Integer.class - ) + (long) vectors.size() * fieldInfo.getVectorDimension() * fieldInfo.getVectorEncoding().byteSize + flatFieldVectorsWriter - .ramBytesUsed(); + return SHALLOW_SIZE + flatFieldVectorsWriter.getDocsWithFieldSet().ramBytesUsed() + (long) this.vectors.size() + * (long) (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER) + (long) this.vectors.size() + * RamUsageEstimator.shallowSizeOfInstance(Integer.class) + (long) vectors.size() * fieldInfo.getVectorDimension() + * fieldInfo.getVectorEncoding().byteSize + flatFieldVectorsWriter.ramBytesUsed(); } } diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java index 7c86365776..3966a2c956 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java @@ -100,7 +100,7 @@ public void flush(int maxDoc, final Sorter.DocMap sortMap) throws IOException { } final Supplier> knnVectorValuesSupplier = () -> getVectorValues( vectorDataType, - field.getDocsWithField(), + field.getFlatFieldVectorsWriter().getDocsWithFieldSet(), field.getVectors() ); final QuantizationState quantizationState = train(field.getFieldInfo(), knnVectorValuesSupplier, totalLiveDocs); diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngineFieldVectorsWriterTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngineFieldVectorsWriterTests.java index 4f68a360ec..707ebb2a69 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngineFieldVectorsWriterTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngineFieldVectorsWriterTests.java @@ -13,6 +13,7 @@ import lombok.SneakyThrows; import org.apache.lucene.codecs.hnsw.FlatFieldVectorsWriter; +import org.apache.lucene.index.DocsWithFieldSet; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.util.InfoStream; @@ -115,6 +116,7 @@ public void testRamByteUsed_whenValidInput_thenSuccess() { Mockito.when(fieldInfo.getVectorDimension()).thenReturn(2); FlatFieldVectorsWriter mockedFlatFieldVectorsWriter = Mockito.mock(FlatFieldVectorsWriter.class); Mockito.when(mockedFlatFieldVectorsWriter.ramBytesUsed()).thenReturn(1L); + Mockito.when(mockedFlatFieldVectorsWriter.getDocsWithFieldSet()).thenReturn(new DocsWithFieldSet()); final NativeEngineFieldVectorsWriter floatWriter = (NativeEngineFieldVectorsWriter) NativeEngineFieldVectorsWriter .create(fieldInfo, mockedFlatFieldVectorsWriter, InfoStream.getDefault()); // testing for value > 0 as we don't have a concrete way to find out expected bytes. This can OS dependent too. diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterFlushTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterFlushTests.java index 03d0f61607..6685e2b223 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterFlushTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterFlushTests.java @@ -161,7 +161,7 @@ public void testFlush() { throw new RuntimeException(e); } - DocsWithFieldSet docsWithFieldSet = field.getDocsWithField(); + DocsWithFieldSet docsWithFieldSet = field.getFlatFieldVectorsWriter().getDocsWithFieldSet(); knnVectorValuesFactoryMockedStatic.when( () -> KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, docsWithFieldSet, vectorsPerField.get(i)) ).thenReturn(expectedVectorValues.get(i)); @@ -250,7 +250,7 @@ public void testFlush_WithQuantization() { throw new RuntimeException(e); } - DocsWithFieldSet docsWithFieldSet = field.getDocsWithField(); + DocsWithFieldSet docsWithFieldSet = field.getFlatFieldVectorsWriter().getDocsWithFieldSet(); knnVectorValuesFactoryMockedStatic.when( () -> KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, docsWithFieldSet, vectorsPerField.get(i)) ).thenReturn(expectedVectorValues.get(i)); @@ -352,7 +352,7 @@ public void testFlush_whenThresholdIsNegative_thenNativeIndexWriterIsNeverCalled throw new RuntimeException(e); } - DocsWithFieldSet docsWithFieldSet = field.getDocsWithField(); + DocsWithFieldSet docsWithFieldSet = field.getFlatFieldVectorsWriter().getDocsWithFieldSet(); knnVectorValuesFactoryMockedStatic.when( () -> KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, docsWithFieldSet, vectorsPerField.get(i)) ).thenReturn(expectedVectorValues.get(i)); @@ -429,7 +429,7 @@ public void testFlush_whenThresholdIsGreaterThanVectorSize_thenNativeIndexWriter throw new RuntimeException(e); } - DocsWithFieldSet docsWithFieldSet = field.getDocsWithField(); + DocsWithFieldSet docsWithFieldSet = field.getFlatFieldVectorsWriter().getDocsWithFieldSet(); knnVectorValuesFactoryMockedStatic.when( () -> KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, docsWithFieldSet, vectorsPerField.get(i)) ).thenReturn(expectedVectorValues.get(i)); @@ -507,7 +507,7 @@ public void testFlush_whenThresholdIsEqualToMinNumberOfVectors_thenNativeIndexWr throw new RuntimeException(e); } - DocsWithFieldSet docsWithFieldSet = field.getDocsWithField(); + DocsWithFieldSet docsWithFieldSet = field.getFlatFieldVectorsWriter().getDocsWithFieldSet(); knnVectorValuesFactoryMockedStatic.when( () -> KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, docsWithFieldSet, vectorsPerField.get(i)) ).thenReturn(expectedVectorValues.get(i)); @@ -593,7 +593,7 @@ public void testFlush_whenThresholdIsEqualToFixedValue_thenRelevantNativeIndexWr throw new RuntimeException(e); } - DocsWithFieldSet docsWithFieldSet = field.getDocsWithField(); + DocsWithFieldSet docsWithFieldSet = field.getFlatFieldVectorsWriter().getDocsWithFieldSet(); knnVectorValuesFactoryMockedStatic.when( () -> KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, docsWithFieldSet, vectorsPerField.get(i)) ).thenReturn(expectedVectorValues.get(i)); @@ -683,7 +683,7 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres throw new RuntimeException(e); } - DocsWithFieldSet docsWithFieldSet = field.getDocsWithField(); + DocsWithFieldSet docsWithFieldSet = field.getFlatFieldVectorsWriter().getDocsWithFieldSet(); knnVectorValuesFactoryMockedStatic.when( () -> KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, docsWithFieldSet, vectorsPerField.get(i)) ).thenReturn(expectedVectorValues.get(i)); @@ -786,7 +786,7 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres throw new RuntimeException(e); } - DocsWithFieldSet docsWithFieldSet = field.getDocsWithField(); + DocsWithFieldSet docsWithFieldSet = field.getFlatFieldVectorsWriter().getDocsWithFieldSet(); knnVectorValuesFactoryMockedStatic.when( () -> KNNVectorValuesFactory.getVectorValues(VectorDataType.FLOAT, docsWithFieldSet, vectorsPerField.get(i)) ).thenReturn(expectedVectorValues.get(i)); @@ -848,11 +848,13 @@ private FieldInfo fieldInfo(int fieldNumber, VectorEncoding vectorEncoding, Map< private NativeEngineFieldVectorsWriter nativeEngineFieldVectorsWriter(FieldInfo fieldInfo, Map vectors) { NativeEngineFieldVectorsWriter fieldVectorsWriter = mock(NativeEngineFieldVectorsWriter.class); + FlatFieldVectorsWriter flatFieldVectorsWriter = mock(FlatFieldVectorsWriter.class); DocsWithFieldSet docsWithFieldSet = new DocsWithFieldSet(); vectors.keySet().stream().sorted().forEach(docsWithFieldSet::add); when(fieldVectorsWriter.getFieldInfo()).thenReturn(fieldInfo); when(fieldVectorsWriter.getVectors()).thenReturn(vectors); - when(fieldVectorsWriter.getDocsWithField()).thenReturn(docsWithFieldSet); + when(fieldVectorsWriter.getFlatFieldVectorsWriter()).thenReturn(flatFieldVectorsWriter); + when(flatFieldVectorsWriter.getDocsWithFieldSet()).thenReturn(docsWithFieldSet); return fieldVectorsWriter; } } diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterMergeTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterMergeTests.java index 77f3fd8eda..cdc372bda2 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterMergeTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterMergeTests.java @@ -370,11 +370,13 @@ private FieldInfo fieldInfo(int fieldNumber, VectorEncoding vectorEncoding, Map< private NativeEngineFieldVectorsWriter nativeEngineFieldVectorsWriter(FieldInfo fieldInfo, Map vectors) { NativeEngineFieldVectorsWriter fieldVectorsWriter = mock(NativeEngineFieldVectorsWriter.class); + FlatFieldVectorsWriter flatFieldVectorsWriter = mock(FlatFieldVectorsWriter.class); DocsWithFieldSet docsWithFieldSet = new DocsWithFieldSet(); vectors.keySet().stream().sorted().forEach(docsWithFieldSet::add); when(fieldVectorsWriter.getFieldInfo()).thenReturn(fieldInfo); when(fieldVectorsWriter.getVectors()).thenReturn(vectors); - when(fieldVectorsWriter.getDocsWithField()).thenReturn(docsWithFieldSet); + when(fieldVectorsWriter.getFlatFieldVectorsWriter()).thenReturn(flatFieldVectorsWriter); + when(flatFieldVectorsWriter.getDocsWithFieldSet()).thenReturn(docsWithFieldSet); return fieldVectorsWriter; } }