-
Notifications
You must be signed in to change notification settings - Fork 143
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add vector data upload implementation to RemoteIndexBuildStrategy
Signed-off-by: Jay Deng <[email protected]>
- Loading branch information
Showing
10 changed files
with
899 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
95 changes: 95 additions & 0 deletions
95
src/main/java/org/opensearch/knn/index/codec/nativeindex/remote/DocIdInputStream.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.index.codec.nativeindex.remote; | ||
|
||
import lombok.extern.log4j.Log4j2; | ||
import org.apache.lucene.search.DocIdSetIterator; | ||
import org.opensearch.knn.index.vectorvalues.KNNVectorValues; | ||
|
||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.nio.ByteBuffer; | ||
import java.nio.ByteOrder; | ||
|
||
import static org.opensearch.knn.index.codec.util.KNNCodecUtil.initializeVectorValues; | ||
|
||
/** | ||
* {@link InputStream} implementation of doc ids backed by {@link KNNVectorValues} rather than any file. Intended for use by {@link RemoteIndexBuildStrategy} | ||
*/ | ||
@Log4j2 | ||
public class DocIdInputStream extends InputStream { | ||
private final KNNVectorValues<?> knnVectorValues; | ||
// Doc ids are 4 byte integers, byte read() only returns a single byte, so we will need to track the byte position within a doc id. | ||
// For simplicity, and to maintain the byte ordering, we use a buffer with size of 1 int. | ||
private ByteBuffer currentBuffer; | ||
|
||
public DocIdInputStream(KNNVectorValues<?> knnVectorValues) throws IOException { | ||
this.currentBuffer = ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN); | ||
this.knnVectorValues = knnVectorValues; | ||
initializeVectorValues(this.knnVectorValues); | ||
reloadBuffer(); | ||
} | ||
|
||
@Override | ||
public int read() throws IOException { | ||
if (currentBuffer == null) { | ||
return -1; | ||
} | ||
|
||
if (!currentBuffer.hasRemaining()) { | ||
advanceAndReloadBuffer(); | ||
if (currentBuffer == null) { | ||
return -1; | ||
} | ||
} | ||
|
||
return currentBuffer.get() & 0xFF; | ||
} | ||
|
||
@Override | ||
public int read(byte[] b, int off, int len) throws IOException { | ||
if (currentBuffer == null) { | ||
return -1; | ||
} | ||
|
||
int available = currentBuffer.remaining(); | ||
if (available <= 0) { | ||
advanceAndReloadBuffer(); | ||
if (currentBuffer == null) { | ||
return -1; | ||
} | ||
available = currentBuffer.remaining(); | ||
} | ||
|
||
int bytesToRead = Math.min(available, len); | ||
currentBuffer.get(b, off, bytesToRead); | ||
return bytesToRead; | ||
} | ||
|
||
/** | ||
* Advances to the next doc, and then refills the buffer with the new doc. | ||
* @throws IOException | ||
*/ | ||
private void advanceAndReloadBuffer() throws IOException { | ||
int docId = knnVectorValues.nextDoc(); | ||
if (docId != -1 && docId != DocIdSetIterator.NO_MORE_DOCS) { | ||
reloadBuffer(); | ||
} else { | ||
// Reset buffer to null to indicate that there are no more docs to be read | ||
currentBuffer = null; | ||
} | ||
} | ||
|
||
/** | ||
* Reload {@link currentBuffer} with the current doc id that {@link knnVectorValues} is pointing to | ||
* @throws IOException | ||
*/ | ||
private void reloadBuffer() throws IOException { | ||
currentBuffer.clear(); | ||
currentBuffer.putInt(knnVectorValues.docId()); | ||
currentBuffer.position(0); | ||
} | ||
} |
Oops, something went wrong.