From ced203546780d7d83e85c3abc957a2787e2b9bad Mon Sep 17 00:00:00 2001 From: Vijayan Balasubramanian Date: Mon, 23 Dec 2024 13:20:06 -0800 Subject: [PATCH] Remove skip building graph check for quantization use case For quantization indices, we don't have to apply building graph check since it is already faster, this is now only applied for fp32/16 indices and where threshold is configured. Signed-off-by: Vijayan Balasubramanian --- CHANGELOG.md | 1 + .../KNN990Codec/NativeEngines990KnnVectorsWriter.java | 10 ++++------ .../NativeEngines990KnnVectorsWriterFlushTests.java | 11 ++++------- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b32de7bc53..7149c3d577 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Optimizes lucene query execution to prevent unnecessary rewrites (#2305)[https://github.com/opensearch-project/k-NN/pull/2305] - Add check to directly use ANN Search when filters match all docs. (#2320)[https://github.com/opensearch-project/k-NN/pull/2320] - Use one formula to calculate cosine similarity (#2357)[https://github.com/opensearch-project/k-NN/pull/2357] +- Remove skip building graph check for quantization use case (#2430)[https://github.com/opensearch-project/k-NN/2430] ### Bug Fixes * Fixing the bug when a segment has no vector field present for disk based vector search (#2282)[https://github.com/opensearch-project/k-NN/pull/2282] * Fix for NPE while merging segments after all the vector fields docs are deleted (#2365)[https://github.com/opensearch-project/k-NN/pull/2365] diff --git a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java index 7c86365776..ca69d29454 100644 --- a/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java +++ b/src/main/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriter.java @@ -104,9 +104,8 @@ public void flush(int maxDoc, final Sorter.DocMap sortMap) throws IOException { field.getVectors() ); final QuantizationState quantizationState = train(field.getFieldInfo(), knnVectorValuesSupplier, totalLiveDocs); - // Check only after quantization state writer finish writing its state, since it is required - // even if there are no graph files in segment, which will be later used by exact search - if (shouldSkipBuildingVectorDataStructure(totalLiveDocs)) { + // should skip graph building only for non quantization use case and if threshold is met + if (quantizationState == null && shouldSkipBuildingVectorDataStructure(totalLiveDocs)) { log.info( "Skip building vector data structure for field: {}, as liveDoc: {} is less than the threshold {} during flush", fieldInfo.name, @@ -144,9 +143,8 @@ public void mergeOneField(final FieldInfo fieldInfo, final MergeState mergeState } final QuantizationState quantizationState = train(fieldInfo, knnVectorValuesSupplier, totalLiveDocs); - // Check only after quantization state writer finish writing its state, since it is required - // even if there are no graph files in segment, which will be later used by exact search - if (shouldSkipBuildingVectorDataStructure(totalLiveDocs)) { + // should skip graph building only for non quantization use case and if threshold is met + if (quantizationState == null && shouldSkipBuildingVectorDataStructure(totalLiveDocs)) { log.info( "Skip building vector data structure for field: {}, as liveDoc: {} is less than the threshold {} during merge", fieldInfo.name, diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterFlushTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterFlushTests.java index 03d0f61607..d72e435fad 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterFlushTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNN990Codec/NativeEngines990KnnVectorsWriterFlushTests.java @@ -630,8 +630,7 @@ public void testFlush_whenThresholdIsEqualToFixedValue_thenRelevantNativeIndexWr } } - public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThresholdIsNotMet_thenSkipBuildingGraph() - throws IOException { + public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThresholdIsNotMet_thenStillBuildGraph() throws IOException { // Given List> expectedVectorValues = new ArrayList<>(); final Map sizeMap = new HashMap<>(); @@ -714,7 +713,6 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres } else { assertEquals(0, knn990QuantWriterMockedConstruction.constructed().size()); } - verifyNoInteractions(nativeIndexWriter); IntStream.range(0, vectorsPerField.size()).forEach(i -> { try { if (vectorsPerField.get(i).isEmpty()) { @@ -729,12 +727,12 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres final Long expectedTimesGetVectorValuesIsCalled = vectorsPerField.stream().filter(Predicate.not(Map::isEmpty)).count(); knnVectorValuesFactoryMockedStatic.verify( () -> KNNVectorValuesFactory.getVectorValues(any(VectorDataType.class), any(DocsWithFieldSet.class), any()), - times(Math.toIntExact(expectedTimesGetVectorValuesIsCalled)) + times(Math.toIntExact(expectedTimesGetVectorValuesIsCalled) * 2) ); } } - public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThresholdIsNegative_thenSkipBuildingGraph() + public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThresholdIsNegative_thenStillBuildGraph() throws IOException { // Given List> expectedVectorValues = new ArrayList<>(); @@ -817,7 +815,6 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres } else { assertEquals(0, knn990QuantWriterMockedConstruction.constructed().size()); } - verifyNoInteractions(nativeIndexWriter); IntStream.range(0, vectorsPerField.size()).forEach(i -> { try { if (vectorsPerField.get(i).isEmpty()) { @@ -832,7 +829,7 @@ public void testFlush_whenQuantizationIsProvided_whenBuildGraphDatStructureThres final Long expectedTimesGetVectorValuesIsCalled = vectorsPerField.stream().filter(Predicate.not(Map::isEmpty)).count(); knnVectorValuesFactoryMockedStatic.verify( () -> KNNVectorValuesFactory.getVectorValues(any(VectorDataType.class), any(DocsWithFieldSet.class), any()), - times(Math.toIntExact(expectedTimesGetVectorValuesIsCalled)) + times(Math.toIntExact(expectedTimesGetVectorValuesIsCalled) * 2) ); } }