Skip to content

Commit e728d46

Browse files
authored
[8.x] Support 7x segments as archive in 8x / 9x (#125389) (#125918)
Support 7x segments as archive in 8x / 9x - backport to 8.x
1 parent 360737b commit e728d46

File tree

17 files changed

+868
-76
lines changed

17 files changed

+868
-76
lines changed

docs/changelog/125389.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 125389
2+
summary: Support indices created in ESv6 and updated in ESV7 using different LuceneCodecs
3+
as archive in current version.
4+
area: Search
5+
type: bug
6+
issues: []

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java

+128-35
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77

88
package org.elasticsearch.xpack.lucene.bwc.codecs;
99

10-
import org.apache.lucene.backward_codecs.lucene70.Lucene70Codec;
1110
import org.apache.lucene.codecs.Codec;
1211
import org.apache.lucene.codecs.FieldInfosFormat;
1312
import org.apache.lucene.codecs.FieldsConsumer;
@@ -18,6 +17,7 @@
1817
import org.apache.lucene.codecs.PostingsFormat;
1918
import org.apache.lucene.codecs.SegmentInfoFormat;
2019
import org.apache.lucene.codecs.TermVectorsFormat;
20+
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
2121
import org.apache.lucene.index.FieldInfo;
2222
import org.apache.lucene.index.FieldInfos;
2323
import org.apache.lucene.index.Fields;
@@ -27,7 +27,12 @@
2727
import org.apache.lucene.index.Terms;
2828
import org.apache.lucene.store.Directory;
2929
import org.apache.lucene.store.IOContext;
30+
import org.apache.lucene.util.Version;
3031
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.BWCLucene70Codec;
32+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene80.BWCLucene80Codec;
33+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene84.BWCLucene84Codec;
34+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene86.BWCLucene86Codec;
35+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene87.BWCLucene87Codec;
3136

3237
import java.io.IOException;
3338
import java.util.ArrayList;
@@ -39,55 +44,122 @@
3944
*/
4045
public abstract class BWCCodec extends Codec {
4146

47+
private final FieldInfosFormat fieldInfosFormat;
48+
private final SegmentInfoFormat segmentInfosFormat;
49+
private final PostingsFormat postingsFormat;
50+
4251
protected BWCCodec(String name) {
4352
super(name);
44-
}
4553

46-
@Override
47-
public NormsFormat normsFormat() {
48-
throw new UnsupportedOperationException();
49-
}
54+
this.fieldInfosFormat = new FieldInfosFormat() {
55+
final FieldInfosFormat wrappedFormat = originalFieldInfosFormat();
5056

51-
@Override
52-
public TermVectorsFormat termVectorsFormat() {
53-
throw new UnsupportedOperationException();
54-
}
57+
@Override
58+
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext)
59+
throws IOException {
60+
return filterFields(wrappedFormat.read(directory, segmentInfo, segmentSuffix, iocontext));
61+
}
5562

56-
@Override
57-
public KnnVectorsFormat knnVectorsFormat() {
58-
throw new UnsupportedOperationException();
59-
}
63+
@Override
64+
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context)
65+
throws IOException {
66+
wrappedFormat.write(directory, segmentInfo, segmentSuffix, infos, context);
67+
}
68+
};
69+
70+
this.segmentInfosFormat = new SegmentInfoFormat() {
71+
final SegmentInfoFormat wrappedFormat = originalSegmentInfoFormat();
6072

61-
protected static SegmentInfoFormat wrap(SegmentInfoFormat wrapped) {
62-
return new SegmentInfoFormat() {
6373
@Override
6474
public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException {
65-
return wrap(wrapped.read(directory, segmentName, segmentID, context));
75+
return wrap(wrappedFormat.read(directory, segmentName, segmentID, context));
6676
}
6777

6878
@Override
6979
public void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException {
70-
wrapped.write(dir, info, ioContext);
80+
wrappedFormat.write(dir, info, ioContext);
7181
}
7282
};
73-
}
7483

75-
protected static FieldInfosFormat wrap(FieldInfosFormat wrapped) {
76-
return new FieldInfosFormat() {
84+
this.postingsFormat = new PerFieldPostingsFormat() {
7785
@Override
78-
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext)
79-
throws IOException {
80-
return filterFields(wrapped.read(directory, segmentInfo, segmentSuffix, iocontext));
81-
}
82-
83-
@Override
84-
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context)
85-
throws IOException {
86-
wrapped.write(directory, segmentInfo, segmentSuffix, infos, context);
86+
public PostingsFormat getPostingsFormatForField(String field) {
87+
throw new UnsupportedOperationException("Old codecs can't be used for writing");
8788
}
8889
};
8990
}
9091

92+
@Override
93+
public final FieldInfosFormat fieldInfosFormat() {
94+
return fieldInfosFormat;
95+
}
96+
97+
@Override
98+
public final SegmentInfoFormat segmentInfoFormat() {
99+
return segmentInfosFormat;
100+
}
101+
102+
@Override
103+
public PostingsFormat postingsFormat() {
104+
return postingsFormat;
105+
}
106+
107+
/**
108+
* This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
109+
* This method is never called in practice, as we rewrite field infos to override the info about which features are present in
110+
* the index. Even if norms are present, field info lies about it.
111+
*
112+
* @return nothing, as this method always throws an exception
113+
* @throws UnsupportedOperationException always thrown to indicate that this method is not supported
114+
*/
115+
@Override
116+
public final NormsFormat normsFormat() {
117+
throw new UnsupportedOperationException();
118+
}
119+
120+
/**
121+
* This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
122+
* This method is never called in practice, as we rewrite field infos to override the info about which features are present in
123+
* the index. Even if term vectors are present, field info lies about it.
124+
*
125+
* @return nothing, as this method always throws an exception
126+
* @throws UnsupportedOperationException always thrown to indicate that this method is not supported
127+
*/
128+
@Override
129+
public final TermVectorsFormat termVectorsFormat() {
130+
throw new UnsupportedOperationException();
131+
}
132+
133+
/**
134+
* This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
135+
* The knn vectors can't be present because it is not supported yet in any of the lucene versions that we support for archive indices.
136+
*
137+
* @return nothing, as this method always throws an exception
138+
* @throws UnsupportedOperationException always thrown to indicate that this method is not supported
139+
*/
140+
@Override
141+
public final KnnVectorsFormat knnVectorsFormat() {
142+
throw new UnsupportedOperationException();
143+
}
144+
145+
/**
146+
* Returns the original {@link SegmentInfoFormat} used by this codec.
147+
* This method should be implemented by subclasses to provide the specific
148+
* {@link SegmentInfoFormat} that this codec is intended to use.
149+
*
150+
* @return the original {@link SegmentInfoFormat} used by this codec
151+
*/
152+
protected abstract SegmentInfoFormat originalSegmentInfoFormat();
153+
154+
/**
155+
* Returns the original {@link FieldInfosFormat} used by this codec.
156+
* This method should be implemented by subclasses to provide the specific
157+
* {@link FieldInfosFormat} that this codec is intended to use.
158+
*
159+
* @return the original {@link FieldInfosFormat} used by this codec
160+
*/
161+
protected abstract FieldInfosFormat originalFieldInfosFormat();
162+
91163
// mark all fields as no term vectors, no norms, no payloads, and no vectors.
92164
private static FieldInfos filterFields(FieldInfos fieldInfos) {
93165
List<FieldInfo> fieldInfoCopy = new ArrayList<>(fieldInfos.size());
@@ -119,15 +191,14 @@ private static FieldInfos filterFields(FieldInfos fieldInfos) {
119191
}
120192

121193
public static SegmentInfo wrap(SegmentInfo segmentInfo) {
122-
// special handling for Lucene70Codec (which is currently bundled with Lucene)
123-
// Use BWCLucene70Codec instead as that one extends BWCCodec (similar to all other older codecs)
124-
final Codec codec = segmentInfo.getCodec() instanceof Lucene70Codec ? new BWCLucene70Codec() : segmentInfo.getCodec();
194+
Codec codec = getBackwardCompatibleCodec(segmentInfo.getCodec());
195+
125196
final SegmentInfo segmentInfo1 = new SegmentInfo(
126197
segmentInfo.dir,
127198
// Use Version.LATEST instead of original version, otherwise SegmentCommitInfo will bark when processing (N-1 limitation)
128199
// TODO: perhaps store the original version information in attributes so that we can retrieve it later when needed?
129-
org.apache.lucene.util.Version.LATEST,
130-
org.apache.lucene.util.Version.LATEST,
200+
Version.LATEST,
201+
Version.LATEST,
131202
segmentInfo.name,
132203
segmentInfo.maxDoc(),
133204
segmentInfo.getUseCompoundFile(),
@@ -142,6 +213,28 @@ public static SegmentInfo wrap(SegmentInfo segmentInfo) {
142213
return segmentInfo1;
143214
}
144215

216+
/**
217+
* Returns a backward-compatible codec for the given codec. If the codec is one of the known Lucene 8.x codecs,
218+
* it returns a corresponding read-only backward-compatible codec. Otherwise, it returns the original codec.
219+
* Lucene 8.x codecs are still shipped with the current version of Lucene.
220+
* Earlier codecs we are providing directly they will also be read-only backward-compatible, but they don't require the renaming.
221+
*
222+
* This switch is only for indices created in ES 6.x, later written into in ES 7.x (Lucene 8.x). Indices created
223+
* in ES 7.x can be read directly by ES if marked read-only, without going through archive indices.
224+
*/
225+
private static Codec getBackwardCompatibleCodec(Codec codec) {
226+
if (codec == null) return null;
227+
228+
return switch (codec.getClass().getSimpleName()) {
229+
case "Lucene70Codec" -> new BWCLucene70Codec();
230+
case "Lucene80Codec" -> new BWCLucene80Codec();
231+
case "Lucene84Codec" -> new BWCLucene84Codec();
232+
case "Lucene86Codec" -> new BWCLucene86Codec();
233+
case "Lucene87Codec" -> new BWCLucene87Codec();
234+
default -> codec;
235+
};
236+
}
237+
145238
/**
146239
* In-memory postings format that shows no postings available.
147240
*/

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java

+7-8
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,7 @@
4747
*/
4848
@Deprecated
4949
public class Lucene60Codec extends BWCCodec {
50-
private final FieldInfosFormat fieldInfosFormat = wrap(new Lucene60FieldInfosFormat());
51-
private final SegmentInfoFormat segmentInfosFormat = wrap(new Lucene50SegmentInfoFormat());
50+
5251
private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
5352
private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
5453
private final StoredFieldsFormat storedFieldsFormat;
@@ -89,18 +88,18 @@ public Lucene60Codec(Lucene50StoredFieldsFormat.Mode mode) {
8988
}
9089

9190
@Override
92-
public final StoredFieldsFormat storedFieldsFormat() {
93-
return storedFieldsFormat;
91+
protected FieldInfosFormat originalFieldInfosFormat() {
92+
return new Lucene60FieldInfosFormat();
9493
}
9594

9695
@Override
97-
public final FieldInfosFormat fieldInfosFormat() {
98-
return fieldInfosFormat;
96+
protected SegmentInfoFormat originalSegmentInfoFormat() {
97+
return new Lucene50SegmentInfoFormat();
9998
}
10099

101100
@Override
102-
public SegmentInfoFormat segmentInfoFormat() {
103-
return segmentInfosFormat;
101+
public final StoredFieldsFormat storedFieldsFormat() {
102+
return storedFieldsFormat;
104103
}
105104

106105
@Override

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60MetadataOnlyPointsFormat.java

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.io.IOException;
2929

3030
/**
31+
* This is a fork of {@link org.apache.lucene.backward_codecs.lucene60.Lucene60PointsFormat}
3132
* Allows reading metadata only from Lucene 6.0 point format
3233
**/
3334
public class Lucene60MetadataOnlyPointsFormat extends PointsFormat {

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60MetadataOnlyPointsReader.java

+5-2
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,10 @@
3434
import java.util.HashMap;
3535
import java.util.Map;
3636

37-
/** Reads the metadata of point values previously written with Lucene60PointsWriter */
37+
/**
38+
* This is a fork of {@link org.apache.lucene.backward_codecs.lucene60.Lucene60PointsReader}
39+
* Reads the metadata of point values previously written with Lucene60PointsWriter
40+
*/
3841
public final class Lucene60MetadataOnlyPointsReader extends PointsReader {
3942
final IndexInput dataIn;
4043
final SegmentReadState readState;
@@ -105,7 +108,7 @@ public Lucene60MetadataOnlyPointsReader(SegmentReadState readState) throws IOExc
105108
int fieldNumber = ent.getKey();
106109
long fp = ent.getValue();
107110
dataIn.seek(fp);
108-
PointValues reader = new MetadataOnlyBKDReader(dataIn);
111+
PointValues reader = new MetadataOnlyBKDReader(dataIn, false);
109112
readers.put(fieldNumber, reader);
110113
}
111114

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/MetadataOnlyBKDReader.java

+18-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public class MetadataOnlyBKDReader extends PointValues {
4747
final int docCount;
4848
final int version;
4949

50-
public MetadataOnlyBKDReader(IndexInput metaIn) throws IOException {
50+
public MetadataOnlyBKDReader(IndexInput metaIn, boolean isVersionPost86) throws IOException {
5151
version = CodecUtil.checkHeader(metaIn, "BKD", VERSION_START, VERSION_CURRENT);
5252
final int numDims = metaIn.readVInt();
5353
final int numIndexDims;
@@ -85,6 +85,23 @@ public MetadataOnlyBKDReader(IndexInput metaIn) throws IOException {
8585

8686
pointCount = metaIn.readVLong();
8787
docCount = metaIn.readVInt();
88+
89+
// The pre-8.6 code does not read the following fields that its standard Lucene counterpart does. After experimenting with the
90+
// code, we got to the conclusion that these are the last fields being read, which are not needed in the metadata-only reader, and
91+
// we can safely ignore them when loading the file. Although by coincidence, nothing breaks if we read a couple of VLongs, as long
92+
// as some bytes are available to read.
93+
//
94+
// The extra reads have been introduced to process IndexInput created with Lucene86Codec+, where a new BKD format has been
95+
// introduced. We have stricter checks around the header and footer starting from the 86 formats hence we do need to
96+
// consume all the data input there but not in previous formats.
97+
//
98+
// For correctness, we added version checking here. If and only if, the version is 8.6 or higher, we read the additional fields.
99+
if (isVersionPost86) {
100+
metaIn.readVInt();
101+
metaIn.readLong();
102+
// The following fields are not used in this class, but we need to read them to advance the pointer
103+
metaIn.readLong();
104+
}
88105
}
89106

90107
@Override

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java

+7-8
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,7 @@
4747
*/
4848
@Deprecated
4949
public class Lucene62Codec extends BWCCodec {
50-
private final FieldInfosFormat fieldInfosFormat = wrap(new Lucene60FieldInfosFormat());
51-
private final SegmentInfoFormat segmentInfosFormat = wrap(new Lucene62SegmentInfoFormat());
50+
5251
private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
5352
private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
5453
private final StoredFieldsFormat storedFieldsFormat;
@@ -80,18 +79,18 @@ public Lucene62Codec(Lucene50StoredFieldsFormat.Mode mode) {
8079
}
8180

8281
@Override
83-
public final StoredFieldsFormat storedFieldsFormat() {
84-
return storedFieldsFormat;
82+
protected FieldInfosFormat originalFieldInfosFormat() {
83+
return new Lucene60FieldInfosFormat();
8584
}
8685

8786
@Override
88-
public final FieldInfosFormat fieldInfosFormat() {
89-
return fieldInfosFormat;
87+
protected SegmentInfoFormat originalSegmentInfoFormat() {
88+
return new Lucene62SegmentInfoFormat();
9089
}
9190

9291
@Override
93-
public SegmentInfoFormat segmentInfoFormat() {
94-
return segmentInfosFormat;
92+
public final StoredFieldsFormat storedFieldsFormat() {
93+
return storedFieldsFormat;
9594
}
9695

9796
@Override

0 commit comments

Comments
 (0)