Skip to content

Commit

Permalink
Support custom codecs via stored fields.
Browse files Browse the repository at this point in the history
Adds a segment info attribute that stores the delegate codec name as a
segment info attribute so that custom codecs can be used in conjunction
with the knn codec.

Signed-off-by: John Mazanec <[email protected]>
  • Loading branch information
jmazanec15 committed Feb 20, 2025
1 parent edcbe31 commit 833312f
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ private StoredFieldsFormat getStoredFieldsFormat() {
}
return null;
}));
return new DerivedSourceStoredFieldsFormat(delegate.storedFieldsFormat(), derivedSourceReadersSupplier, mapperService);
return new DerivedSourceStoredFieldsFormat(
delegate.storedFieldsFormat(),
derivedSourceReadersSupplier,
mapperService,
delegate.getName()
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
package org.opensearch.knn.index.codec.KNN9120Codec;

import lombok.AllArgsConstructor;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.StoredFieldsWriter;
Expand All @@ -32,15 +33,26 @@
@AllArgsConstructor
public class DerivedSourceStoredFieldsFormat extends StoredFieldsFormat {

private static final String DELEGATE_CODEC_KEY = "knn_delegate_codec";

private final StoredFieldsFormat delegate;
private final DerivedSourceReadersSupplier derivedSourceReadersSupplier;
// IMPORTANT Do not rely on this for the reader, it will be null if SPI is used
@Nullable
private final MapperService mapperService;
// IMPORTANT Do not rely on this for the reader, it will be null if SPI is used
@Nullable
private final String delegateCodecName;

@Override
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo segmentInfo, FieldInfos fieldInfos, IOContext ioContext)
throws IOException {
StoredFieldsFormat delegateFromWriting = delegate;
if (segmentInfo.getAttribute(DELEGATE_CODEC_KEY) != null) {
String delegateCodecName = segmentInfo.getAttribute(DELEGATE_CODEC_KEY);
delegateFromWriting = Codec.forName(delegateCodecName).storedFieldsFormat();
}

List<FieldInfo> derivedVectorFields = null;
for (FieldInfo fieldInfo : fieldInfos) {
if (DERIVED_VECTOR_FIELD_ATTRIBUTE_TRUE_VALUE.equals(fieldInfo.attributes().get(DERIVED_VECTOR_FIELD_ATTRIBUTE_KEY))) {
Expand All @@ -53,10 +65,10 @@ public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo segmentI
}
// If no fields have it enabled, we can just short-circuit and return the delegate's fieldReader
if (derivedVectorFields == null || derivedVectorFields.isEmpty()) {
return delegate.fieldsReader(directory, segmentInfo, fieldInfos, ioContext);
return delegateFromWriting.fieldsReader(directory, segmentInfo, fieldInfos, ioContext);
}
return new DerivedSourceStoredFieldsReader(
delegate.fieldsReader(directory, segmentInfo, fieldInfos, ioContext),
delegateFromWriting.fieldsReader(directory, segmentInfo, fieldInfos, ioContext),
derivedVectorFields,
derivedSourceReadersSupplier,
new SegmentReadState(directory, segmentInfo, fieldInfos, ioContext)
Expand All @@ -65,6 +77,24 @@ public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo segmentI

@Override
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo segmentInfo, IOContext ioContext) throws IOException {
// We write the delegate codec name into the segmentInfo attributes so that we can read it when loading the
// codec from SPI.
// This is similar to whats done in
// https://github.com/opensearch-project/custom-codecs/blob/2.19.0.0/src/main/java/org/opensearch/index/codec/customcodecs/Lucene912CustomStoredFieldsFormat.java#L95-L100
String previous = segmentInfo.putAttribute(DELEGATE_CODEC_KEY, delegateCodecName);
if (previous != null && previous.equals(delegateCodecName) == false) {
throw new IllegalStateException(
"found existing value for "
+ DELEGATE_CODEC_KEY
+ " for segment: "
+ segmentInfo.name
+ " old = "
+ previous
+ ", new = "
+ delegateCodecName
);
}

StoredFieldsWriter delegateWriter = delegate.fieldsWriter(directory, segmentInfo, ioContext);
if (mapperService != null && KNNSettings.isKNNDerivedSourceEnabled(mapperService.getIndexSettings().getSettings())) {
List<String> vectorFieldTypes = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ private StoredFieldsFormat getStoredFieldsFormat() {
}
return null;
}));
return new DerivedSourceStoredFieldsFormat(delegate.storedFieldsFormat(), derivedSourceReadersSupplier, mapperService);
return new DerivedSourceStoredFieldsFormat(
delegate.storedFieldsFormat(),
derivedSourceReadersSupplier,
mapperService,
delegate.getName()
);
}
}

0 comments on commit 833312f

Please sign in to comment.