You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2022/05/10 18:36:01 UTC
[lucene] branch vectors-disi-direct updated: LUCENE-10502: Refactor hnswVectors format (#870)
This is an automated email from the ASF dual-hosted git repository.
mayya pushed a commit to branch vectors-disi-direct
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/vectors-disi-direct by this push:
new 51077e0e697 LUCENE-10502: Refactor hnswVectors format (#870)
51077e0e697 is described below
commit 51077e0e6978b4243fe753b7491f3a0eed0d1b13
Author: Lu Xugang <q1...@Gmail.com>
AuthorDate: Wed May 11 02:35:54 2022 +0800
LUCENE-10502: Refactor hnswVectors format (#870)
---
lucene/backward-codecs/src/java/module-info.java | 7 +-
.../backward_codecs}/lucene91/Lucene91Codec.java | 7 +-
.../lucene91/Lucene91HnswVectorsFormat.java | 14 +-
.../lucene91/Lucene91HnswVectorsReader.java | 226 +-
.../backward_codecs}/lucene91/package-info.java | 8 +-
.../services/org.apache.lucene.codecs.Codec | 1 +
.../org.apache.lucene.codecs.KnnVectorsFormat | 1 +
.../lucene91/Lucene91HnswVectorsWriter.java | 53 +-
.../backward_codecs/lucene91/Lucene91RWCodec.java | 44 +
.../lucene91/Lucene91RWHnswVectorsFormat.java | 43 +
.../lucene91/TestLucene91HnswVectorsFormat.java | 13 +-
.../backward_index/TestBackwardsCompatibility.java | 2218 ++++++++++++++++++++
.../benchmark/byTask/tasks/CreateIndexTask.java | 4 +-
lucene/core/src/java/module-info.java | 9 +-
.../src/java/org/apache/lucene/codecs/Codec.java | 2 +-
.../org/apache/lucene/codecs/KnnVectorsFormat.java | 2 +-
.../Lucene92Codec.java} | 24 +-
.../Lucene92HnswVectorsFormat.java} | 49 +-
.../Lucene92HnswVectorsReader.java} | 28 +-
.../Lucene92HnswVectorsWriter.java} | 28 +-
.../OffHeapVectorValues.java | 8 +-
.../{lucene91 => lucene92}/package-info.java | 10 +-
.../services/org.apache.lucene.codecs.Codec | 2 +-
.../org.apache.lucene.codecs.KnnVectorsFormat | 2 +-
...tLucene90StoredFieldsFormatHighCompression.java | 10 +-
.../test/org/apache/lucene/index/TestKnnGraph.java | 24 +-
.../apache/lucene/util/hnsw/KnnGraphTester.java | 12 +-
.../org/apache/lucene/util/hnsw/TestHnswGraph.java | 12 +-
.../search/suggest/document/TestSuggestField.java | 4 +-
.../util/TestRuleSetupAndRestoreClassEnv.java | 8 +-
.../org/apache/lucene/tests/util/TestUtil.java | 8 +-
31 files changed, 2657 insertions(+), 224 deletions(-)
diff --git a/lucene/backward-codecs/src/java/module-info.java b/lucene/backward-codecs/src/java/module-info.java
index ae4bd84fa62..c3ae5ef4159 100644
--- a/lucene/backward-codecs/src/java/module-info.java
+++ b/lucene/backward-codecs/src/java/module-info.java
@@ -30,6 +30,7 @@ module org.apache.lucene.backward_codecs {
exports org.apache.lucene.backward_codecs.lucene86;
exports org.apache.lucene.backward_codecs.lucene87;
exports org.apache.lucene.backward_codecs.lucene90;
+ exports org.apache.lucene.backward_codecs.lucene91;
exports org.apache.lucene.backward_codecs.packed;
exports org.apache.lucene.backward_codecs.store;
@@ -39,11 +40,13 @@ module org.apache.lucene.backward_codecs {
org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat,
org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat with
- org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsFormat;
+ org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsFormat,
+ org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsFormat;
provides org.apache.lucene.codecs.Codec with
org.apache.lucene.backward_codecs.lucene80.Lucene80Codec,
org.apache.lucene.backward_codecs.lucene84.Lucene84Codec,
org.apache.lucene.backward_codecs.lucene86.Lucene86Codec,
org.apache.lucene.backward_codecs.lucene87.Lucene87Codec,
- org.apache.lucene.backward_codecs.lucene90.Lucene90Codec;
+ org.apache.lucene.backward_codecs.lucene90.Lucene90Codec,
+ org.apache.lucene.backward_codecs.lucene91.Lucene91Codec;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91Codec.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91Codec.java
similarity index 97%
copy from lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91Codec.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91Codec.java
index 2c289a888c1..1f185131bfe 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91Codec.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91Codec.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene91;
+package org.apache.lucene.backward_codecs.lucene91;
import java.util.Objects;
import org.apache.lucene.codecs.Codec;
@@ -48,9 +48,6 @@ import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
* Implements the Lucene 9.1 index format
*
* <p>If you want to reuse functionality of this codec in another codec, extend {@link FilterCodec}.
- *
- * @see org.apache.lucene.codecs.lucene91 package documentation for file format details.
- * @lucene.experimental
*/
public class Lucene91Codec extends Codec {
@@ -164,7 +161,7 @@ public class Lucene91Codec extends Codec {
}
@Override
- public final KnnVectorsFormat knnVectorsFormat() {
+ public KnnVectorsFormat knnVectorsFormat() {
return knnVectorsFormat;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsFormat.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsFormat.java
similarity index 93%
copy from lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsFormat.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsFormat.java
index 49eaf74c240..dac6a4a9c59 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsFormat.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsFormat.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene91;
+package org.apache.lucene.backward_codecs.lucene91;
import java.io.IOException;
import org.apache.lucene.codecs.KnnVectorsFormat;
@@ -82,7 +82,7 @@ import org.apache.lucene.util.hnsw.HnswGraph;
*
* @lucene.experimental
*/
-public final class Lucene91HnswVectorsFormat extends KnnVectorsFormat {
+public class Lucene91HnswVectorsFormat extends KnnVectorsFormat {
static final String META_CODEC_NAME = "Lucene91HnswVectorsFormatMeta";
static final String VECTOR_DATA_CODEC_NAME = "Lucene91HnswVectorsFormatData";
@@ -101,25 +101,25 @@ public final class Lucene91HnswVectorsFormat extends KnnVectorsFormat {
*/
public static final int DEFAULT_BEAM_WIDTH = 100;
- static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16;
-
/**
* Controls how many of the nearest neighbor candidates are connected to the new node. Defaults to
* {@link Lucene91HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details.
*/
- private final int maxConn;
+ final int maxConn;
/**
* The number of candidate neighbors to track while searching the graph for each newly inserted
* node. Defaults to to {@link Lucene91HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link
* HnswGraph} for details.
*/
- private final int beamWidth;
+ final int beamWidth;
+ /** A constructor for vectors format with default parameters */
public Lucene91HnswVectorsFormat() {
this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH);
}
+ /** A constructor for vectors format */
public Lucene91HnswVectorsFormat(int maxConn, int beamWidth) {
super("Lucene91HnswVectorsFormat");
this.maxConn = maxConn;
@@ -128,7 +128,7 @@ public final class Lucene91HnswVectorsFormat extends KnnVectorsFormat {
@Override
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
- return new Lucene91HnswVectorsWriter(state, maxConn, beamWidth);
+ throw new UnsupportedOperationException("Old codecs may only be used for reading");
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
similarity index 70%
copy from lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsReader.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
index 1df95f980b9..7c4f5916706 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
@@ -15,20 +15,24 @@
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene91;
+package org.apache.lucene.backward_codecs.lucene91;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
+import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
+import java.util.function.IntUnaryOperator;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.RandomAccessVectorValues;
+import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.index.VectorValues;
@@ -39,12 +43,12 @@ import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.hnsw.HnswGraph;
import org.apache.lucene.util.hnsw.HnswGraphSearcher;
import org.apache.lucene.util.hnsw.NeighborQueue;
-import org.apache.lucene.util.packed.DirectMonotonicReader;
/**
* Reads vectors from the index segments along with index data structures supporting KNN search.
@@ -185,7 +189,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
return VectorSimilarityFunction.values()[similarityFunctionId];
}
- private FieldEntry readField(IndexInput input) throws IOException {
+ private FieldEntry readField(DataInput input) throws IOException {
VectorSimilarityFunction similarityFunction = readSimilarityFunction(input);
return new FieldEntry(input, similarityFunction);
}
@@ -196,6 +200,9 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
totalBytes +=
RamUsageEstimator.sizeOfMap(
fields, RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class));
+ for (FieldEntry entry : fields.values()) {
+ totalBytes += RamUsageEstimator.sizeOf(entry.ordToDoc);
+ }
return totalBytes;
}
@@ -208,7 +215,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
@Override
public VectorValues getVectorValues(String field) throws IOException {
FieldEntry fieldEntry = fields.get(field);
- return OffHeapVectorValues.load(fieldEntry, vectorData);
+ return getOffHeapVectorValues(fieldEntry);
}
@Override
@@ -222,7 +229,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
// bound k by total number of vectors to prevent oversizing data structures
k = Math.min(k, fieldEntry.size());
- OffHeapVectorValues vectorValues = OffHeapVectorValues.load(fieldEntry, vectorData);
+ OffHeapVectorValues vectorValues = getOffHeapVectorValues(fieldEntry);
NeighborQueue results =
HnswGraphSearcher.search(
@@ -231,7 +238,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
vectorValues,
fieldEntry.similarityFunction,
getGraph(fieldEntry),
- vectorValues.getAcceptOrds(acceptDocs),
+ getAcceptOrds(acceptDocs, fieldEntry),
visitedLimit);
int i = 0;
@@ -240,7 +247,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
int node = results.topNode();
float score = fieldEntry.similarityFunction.convertToScore(results.topScore());
results.pop();
- scoreDocs[scoreDocs.length - ++i] = new ScoreDoc(vectorValues.ordToDoc(node), score);
+ scoreDocs[scoreDocs.length - ++i] = new ScoreDoc(fieldEntry.ordToDoc(node), score);
}
TotalHits.Relation relation =
@@ -250,6 +257,33 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
return new TopDocs(new TotalHits(results.visitedCount(), relation), scoreDocs);
}
+ private OffHeapVectorValues getOffHeapVectorValues(FieldEntry fieldEntry) throws IOException {
+ IndexInput bytesSlice =
+ vectorData.slice("vector-data", fieldEntry.vectorDataOffset, fieldEntry.vectorDataLength);
+ return new OffHeapVectorValues(
+ fieldEntry.dimension, fieldEntry.size(), fieldEntry.ordToDoc, bytesSlice);
+ }
+
+ private Bits getAcceptOrds(Bits acceptDocs, FieldEntry fieldEntry) {
+ if (fieldEntry.ordToDoc == null) {
+ return acceptDocs;
+ }
+ if (acceptDocs == null) {
+ return null;
+ }
+ return new Bits() {
+ @Override
+ public boolean get(int index) {
+ return acceptDocs.get(fieldEntry.ordToDoc(index));
+ }
+
+ @Override
+ public int length() {
+ return fieldEntry.size;
+ }
+ };
+ }
+
/** Get knn graph values; used for testing */
public HnswGraph getGraph(String field) throws IOException {
FieldInfo info = fieldInfos.fieldInfo(field);
@@ -275,7 +309,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
IOUtils.close(vectorData, vectorIndex);
}
- static class FieldEntry {
+ private static class FieldEntry {
final VectorSimilarityFunction similarityFunction;
final long vectorDataOffset;
@@ -285,29 +319,14 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
final int maxConn;
final int numLevels;
final int dimension;
- final int size;
+ private final int size;
+ final int[] ordToDoc;
+ private final IntUnaryOperator ordToDocOperator;
final int[][] nodesByLevel;
// for each level the start offsets in vectorIndex file from where to read neighbours
final long[] graphOffsetsByLevel;
- // the following four variables used to read docIds encoded by IndexDISI
- // special values of docsWithFieldOffset are -1 and -2
- // -1 : dense
- // -2 : empty
- // other: sparse
- final long docsWithFieldOffset;
- final long docsWithFieldLength;
- final short jumpTableEntryCount;
- final byte denseRankPower;
-
- // the following four variables used to read ordToDoc encoded by DirectMonotonicWriter
- // note that only spare case needs to store ordToDoc
- final long addressesOffset;
- final int blockShift;
- final DirectMonotonicReader.Meta meta;
- final long addressesLength;
-
- FieldEntry(IndexInput input, VectorSimilarityFunction similarityFunction) throws IOException {
+ FieldEntry(DataInput input, VectorSimilarityFunction similarityFunction) throws IOException {
this.similarityFunction = similarityFunction;
vectorDataOffset = input.readVLong();
vectorDataLength = input.readVLong();
@@ -316,24 +335,23 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
dimension = input.readInt();
size = input.readInt();
- docsWithFieldOffset = input.readLong();
- docsWithFieldLength = input.readLong();
- jumpTableEntryCount = input.readShort();
- denseRankPower = input.readByte();
-
- // dense or empty
- if (docsWithFieldOffset == -1 || docsWithFieldOffset == -2) {
- addressesOffset = 0;
- blockShift = 0;
- meta = null;
- addressesLength = 0;
+ int denseSparseMarker = input.readByte();
+ if (denseSparseMarker == -1) {
+ ordToDoc = null; // each document has a vector value
} else {
- // sparse
- addressesOffset = input.readLong();
- blockShift = input.readVInt();
- meta = DirectMonotonicReader.loadMeta(input, size, blockShift);
- addressesLength = input.readLong();
+ assert denseSparseMarker == 0;
+ // TODO: Can we read docIDs from disk directly instead of loading giant arrays in memory?
+ // Or possibly switch to something like DirectMonotonicReader if it doesn't slow down
+ // searches.
+
+ // as not all docs have vector values, fill a mapping from dense vector ordinals to docIds
+ ordToDoc = new int[size];
+ for (int i = 0; i < size; i++) {
+ int doc = input.readInt();
+ ordToDoc[i] = doc;
+ }
}
+ ordToDocOperator = ordToDoc == null ? IntUnaryOperator.identity() : (ord) -> ordToDoc[ord];
// read nodes by level
maxConn = input.readInt();
@@ -370,6 +388,128 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
int size() {
return size;
}
+
+ int ordToDoc(int ord) {
+ return ordToDocOperator.applyAsInt(ord);
+ }
+ }
+
+ /** Read the vector values from the index input. This supports both iterated and random access. */
+ static class OffHeapVectorValues extends VectorValues
+ implements RandomAccessVectorValues, RandomAccessVectorValuesProducer {
+
+ private final int dimension;
+ private final int size;
+ private final int[] ordToDoc;
+ private final IntUnaryOperator ordToDocOperator;
+ private final IndexInput dataIn;
+ private final BytesRef binaryValue;
+ private final ByteBuffer byteBuffer;
+ private final int byteSize;
+ private final float[] value;
+
+ private int ord = -1;
+ private int doc = -1;
+
+ OffHeapVectorValues(int dimension, int size, int[] ordToDoc, IndexInput dataIn) {
+ this.dimension = dimension;
+ this.size = size;
+ this.ordToDoc = ordToDoc;
+ ordToDocOperator = ordToDoc == null ? IntUnaryOperator.identity() : (ord) -> ordToDoc[ord];
+ this.dataIn = dataIn;
+ byteSize = Float.BYTES * dimension;
+ byteBuffer = ByteBuffer.allocate(byteSize);
+ value = new float[dimension];
+ binaryValue = new BytesRef(byteBuffer.array(), byteBuffer.arrayOffset(), byteSize);
+ }
+
+ @Override
+ public int dimension() {
+ return dimension;
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public float[] vectorValue() throws IOException {
+ dataIn.seek((long) ord * byteSize);
+ dataIn.readFloats(value, 0, value.length);
+ return value;
+ }
+
+ @Override
+ public BytesRef binaryValue() throws IOException {
+ dataIn.seek((long) ord * byteSize);
+ dataIn.readBytes(byteBuffer.array(), byteBuffer.arrayOffset(), byteSize, false);
+ return binaryValue;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public int nextDoc() {
+ if (++ord >= size) {
+ doc = NO_MORE_DOCS;
+ } else {
+ doc = ordToDocOperator.applyAsInt(ord);
+ }
+ return doc;
+ }
+
+ @Override
+ public int advance(int target) {
+ assert docID() < target;
+
+ if (ordToDoc == null) {
+ ord = target;
+ } else {
+ ord = Arrays.binarySearch(ordToDoc, ord + 1, ordToDoc.length, target);
+ if (ord < 0) {
+ ord = -(ord + 1);
+ }
+ }
+
+ if (ord < size) {
+ doc = ordToDocOperator.applyAsInt(ord);
+ } else {
+ doc = NO_MORE_DOCS;
+ }
+ return doc;
+ }
+
+ @Override
+ public long cost() {
+ return size;
+ }
+
+ @Override
+ public RandomAccessVectorValues randomAccess() {
+ return new OffHeapVectorValues(dimension, size, ordToDoc, dataIn.clone());
+ }
+
+ @Override
+ public float[] vectorValue(int targetOrd) throws IOException {
+ dataIn.seek((long) targetOrd * byteSize);
+ dataIn.readFloats(value, 0, value.length);
+ return value;
+ }
+
+ @Override
+ public BytesRef binaryValue(int targetOrd) throws IOException {
+ readValue(targetOrd);
+ return binaryValue;
+ }
+
+ private void readValue(int targetOrd) throws IOException {
+ dataIn.seek((long) targetOrd * byteSize);
+ dataIn.readBytes(byteBuffer.array(), byteBuffer.arrayOffset(), byteSize);
+ }
}
/** Read the nearest-neighbors graph from the index input */
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/package-info.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/package-info.java
similarity index 98%
copy from lucene/core/src/java/org/apache/lucene/codecs/lucene91/package-info.java
copy to lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/package-info.java
index 04603320664..d3c660755d0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/package-info.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/package-info.java
@@ -180,8 +180,8 @@
* of files, recording dimensionally indexed fields, to enable fast numeric range filtering
* and large numeric values like BigInteger and BigDecimal (1D) and geographic shape
* intersection (2D, 3D).
- * <li>{@link org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat Vector values}. The
- * vector format stores numeric vectors in a format optimized for random access and
+ * <li>{@link org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsFormat Vector values}.
+ * The vector format stores numeric vectors in a format optimized for random access and
* computation, supporting high-dimensional nearest-neighbor search.
* </ul>
*
@@ -310,7 +310,7 @@
* <td>Holds indexed points</td>
* </tr>
* <tr>
- * <td>{@link org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat Vector values}</td>
+ * <td>{@link org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsFormat Vector values}</td>
* <td>.vec, .vem</td>
* <td>Holds indexed vectors; <code>.vec</code> files contain the raw vector data, and
* <code>.vem</code> the vector metadata</td>
@@ -417,4 +417,4 @@
* <code>UInt64</code> values, or better yet, {@link org.apache.lucene.store.DataOutput#writeVInt
* VInt} values which have no limit. </div>
*/
-package org.apache.lucene.codecs.lucene91;
+package org.apache.lucene.backward_codecs.lucene91;
diff --git a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
index 72e05ab3198..74957355697 100644
--- a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -18,3 +18,4 @@ org.apache.lucene.backward_codecs.lucene84.Lucene84Codec
org.apache.lucene.backward_codecs.lucene86.Lucene86Codec
org.apache.lucene.backward_codecs.lucene87.Lucene87Codec
org.apache.lucene.backward_codecs.lucene90.Lucene90Codec
+org.apache.lucene.backward_codecs.lucene91.Lucene91Codec
diff --git a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
index 17d89f3be7f..550912cbf86 100644
--- a/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
+++ b/lucene/backward-codecs/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
@@ -14,3 +14,4 @@
# limitations under the License.
org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsFormat
+org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsFormat
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsWriter.java
similarity index 83%
copy from lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsWriter.java
copy to lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsWriter.java
index 0653a8938b5..6e1527541b5 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsWriter.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsWriter.java
@@ -15,9 +15,8 @@
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene91;
+package org.apache.lucene.backward_codecs.lucene91;
-import static org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
@@ -25,7 +24,6 @@ import java.util.Arrays;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.KnnVectorsWriter;
-import org.apache.lucene.codecs.lucene90.IndexedDISI;
import org.apache.lucene.index.DocsWithFieldSet;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
@@ -42,7 +40,6 @@ import org.apache.lucene.util.hnsw.HnswGraph.NodesIterator;
import org.apache.lucene.util.hnsw.HnswGraphBuilder;
import org.apache.lucene.util.hnsw.NeighborArray;
import org.apache.lucene.util.hnsw.OnHeapHnswGraph;
-import org.apache.lucene.util.packed.DirectMonotonicWriter;
/**
* Writes vector values and knn graphs to index segments.
@@ -143,12 +140,11 @@ public final class Lucene91HnswVectorsWriter extends KnnVectorsWriter {
long vectorIndexOffset = vectorIndex.getFilePointer();
// build the graph using the temporary vector data
- // we use Lucene91HnswVectorsReader.DenseOffHeapVectorValues for the graph construction
- // doesn't need to know docIds
+ // we pass null for ordToDoc mapping, for the graph construction doesn't need to know docIds
// TODO: separate random access vector values from DocIdSetIterator?
- OffHeapVectorValues offHeapVectors =
- new OffHeapVectorValues.DenseOffHeapVectorValues(
- vectors.dimension(), docsWithField.cardinality(), vectorDataInput);
+ Lucene91HnswVectorsReader.OffHeapVectorValues offHeapVectors =
+ new Lucene91HnswVectorsReader.OffHeapVectorValues(
+ vectors.dimension(), docsWithField.cardinality(), null, vectorDataInput);
OnHeapHnswGraph graph =
offHeapVectors.size() == 0
? null
@@ -211,41 +207,14 @@ public final class Lucene91HnswVectorsWriter extends KnnVectorsWriter {
// write docIDs
int count = docsWithField.cardinality();
meta.writeInt(count);
- if (count == 0) {
- meta.writeLong(-2); // docsWithFieldOffset
- meta.writeLong(0L); // docsWithFieldLength
- meta.writeShort((short) -1); // jumpTableEntryCount
- meta.writeByte((byte) -1); // denseRankPower
- } else if (count == maxDoc) {
- meta.writeLong(-1); // docsWithFieldOffset
- meta.writeLong(0L); // docsWithFieldLength
- meta.writeShort((short) -1); // jumpTableEntryCount
- meta.writeByte((byte) -1); // denseRankPower
+ if (count == maxDoc) {
+ meta.writeByte((byte) -1); // dense marker, each document has a vector value
} else {
- long offset = vectorData.getFilePointer();
- meta.writeLong(offset); // docsWithFieldOffset
- final short jumpTableEntryCount =
- IndexedDISI.writeBitSet(
- docsWithField.iterator(), vectorData, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
- meta.writeLong(vectorData.getFilePointer() - offset); // docsWithFieldLength
- meta.writeShort(jumpTableEntryCount);
- meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
-
- // write ordToDoc mapping
- long start = vectorData.getFilePointer();
- meta.writeLong(start);
- meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
- // dense case and empty case do not need to store ordToMap mapping
- final DirectMonotonicWriter ordToDocWriter =
- DirectMonotonicWriter.getInstance(meta, vectorData, count, DIRECT_MONOTONIC_BLOCK_SHIFT);
- DocIdSetIterator iterator = docsWithField.iterator();
- for (int doc = iterator.nextDoc();
- doc != DocIdSetIterator.NO_MORE_DOCS;
- doc = iterator.nextDoc()) {
- ordToDocWriter.add(doc);
+ meta.writeByte((byte) 0); // sparse marker, some documents don't have vector values
+ DocIdSetIterator iter = docsWithField.iterator();
+ for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
+ meta.writeInt(doc);
}
- ordToDocWriter.finish();
- meta.writeLong(vectorData.getFilePointer() - start);
}
meta.writeInt(maxConn);
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91RWCodec.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91RWCodec.java
new file mode 100644
index 00000000000..573f682754c
--- /dev/null
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91RWCodec.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.backward_codecs.lucene91;
+
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
+
+public class Lucene91RWCodec extends Lucene91Codec {
+
+ private final KnnVectorsFormat defaultKnnVectorsFormat;
+ private final KnnVectorsFormat knnVectorsFormat =
+ new PerFieldKnnVectorsFormat() {
+ @Override
+ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+ return defaultKnnVectorsFormat;
+ }
+ };
+
+ public Lucene91RWCodec() {
+ this.defaultKnnVectorsFormat =
+ new Lucene91RWHnswVectorsFormat(
+ Lucene91HnswVectorsFormat.DEFAULT_MAX_CONN,
+ Lucene91HnswVectorsFormat.DEFAULT_BEAM_WIDTH);
+ }
+
+ @Override
+ public KnnVectorsFormat knnVectorsFormat() {
+ return knnVectorsFormat;
+ }
+}
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91RWHnswVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91RWHnswVectorsFormat.java
new file mode 100644
index 00000000000..9a878265e6a
--- /dev/null
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/Lucene91RWHnswVectorsFormat.java
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.backward_codecs.lucene91;
+
+import java.io.IOException;
+import org.apache.lucene.codecs.KnnVectorsWriter;
+import org.apache.lucene.index.SegmentWriteState;
+
+public class Lucene91RWHnswVectorsFormat extends Lucene91HnswVectorsFormat {
+
+ public Lucene91RWHnswVectorsFormat(int maxConn, int beamWidth) {
+ super(maxConn, beamWidth);
+ }
+
+ @Override
+ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
+ return new Lucene91HnswVectorsWriter(state, maxConn, beamWidth);
+ }
+
+ @Override
+ public String toString() {
+ return "Lucene91RWHnswVectorsFormat(name = Lucene91RWHnswVectorsFormat, maxConn = "
+ + maxConn
+ + ", beamWidth="
+ + beamWidth
+ + ")";
+ }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene91/TestLucene91HnswVectorsFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java
similarity index 79%
rename from lucene/core/src/test/org/apache/lucene/codecs/lucene91/TestLucene91HnswVectorsFormat.java
rename to lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java
index fe828f018cb..4ed6e809be6 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene91/TestLucene91HnswVectorsFormat.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_codecs/lucene91/TestLucene91HnswVectorsFormat.java
@@ -14,21 +14,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene91;
+package org.apache.lucene.backward_codecs.lucene91;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomIntBetween;
-import static org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
-import static org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat.DEFAULT_MAX_CONN;
+import static org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
+import static org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsFormat.DEFAULT_MAX_CONN;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
-import org.apache.lucene.tests.util.TestUtil;
public class TestLucene91HnswVectorsFormat extends BaseKnnVectorsFormatTestCase {
@Override
protected Codec getCodec() {
- return TestUtil.getDefaultCodec();
+ return new Lucene91RWCodec();
}
public void testToString() {
@@ -38,11 +37,11 @@ public class TestLucene91HnswVectorsFormat extends BaseKnnVectorsFormatTestCase
new Lucene91Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
- return new Lucene91HnswVectorsFormat(maxConn, beamWidth);
+ return new Lucene91RWHnswVectorsFormat(maxConn, beamWidth);
}
};
String expectedString =
- "Lucene91HnswVectorsFormat(name = Lucene91HnswVectorsFormat, maxConn = "
+ "Lucene91RWHnswVectorsFormat(name = Lucene91RWHnswVectorsFormat, maxConn = "
+ maxConn
+ ", beamWidth="
+ beamWidth
diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java
new file mode 100644
index 00000000000..b2528ce683b
--- /dev/null
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestBackwardsCompatibility.java
@@ -0,0 +1,2218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.backward_index;
+
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+import static org.apache.lucene.util.Version.LUCENE_9_0_0;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.lang.reflect.Modifier;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.text.ParsePosition;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.TimeZone;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.document.BinaryDocValuesField;
+import org.apache.lucene.document.BinaryPoint;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoubleDocValuesField;
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.FloatDocValuesField;
+import org.apache.lucene.document.FloatPoint;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.KnnVectorField;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.CheckIndex;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexCommit;
+import org.apache.lucene.index.IndexFormatTooOldException;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexUpgrader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.LogByteSizeMergePolicy;
+import org.apache.lucene.index.MergePolicy;
+import org.apache.lucene.index.MultiBits;
+import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.MultiTerms;
+import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.index.SegmentInfos;
+import org.apache.lucene.index.SegmentReader;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.StandardDirectoryReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.VectorSimilarityFunction;
+import org.apache.lucene.index.VectorValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.FieldDoc;
+import org.apache.lucene.search.FieldExistsQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.KnnVectorQuery;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.ByteBuffersDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.NIOFSDirectory;
+import org.apache.lucene.tests.analysis.MockAnalyzer;
+import org.apache.lucene.tests.index.RandomIndexWriter;
+import org.apache.lucene.tests.store.BaseDirectoryWrapper;
+import org.apache.lucene.tests.util.LineFileDocs;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.TestUtil;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.InfoStream;
+import org.apache.lucene.util.Version;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+/*
+ Verify we can read previous versions' indexes, do searches
+ against them, and add documents to them.
+*/
+// See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows
+// machines occasionally
+@SuppressWarnings("deprecation")
+public class TestBackwardsCompatibility extends LuceneTestCase {
+
+ // Backcompat index generation, described below, is mostly automated in:
+ //
+ // dev-tools/scripts/addBackcompatIndexes.py
+ //
+ // For usage information, see:
+ //
+ // http://wiki.apache.org/lucene-java/ReleaseTodo#Generate_Backcompat_Indexes
+ //
+ // -----
+ //
+ // To generate backcompat indexes with the current default codec, run the following ant command:
+ // ant test -Dtestcase=TestBackwardsCompatibility -Dtests.bwcdir=/path/to/store/indexes
+ // -Dtests.codec=default -Dtests.useSecurityManager=false
+ // Also add testmethod with one of the index creation methods below, for example:
+ // -Dtestmethod=testCreateCFS
+ //
+ // Zip up the generated indexes:
+ //
+ // cd /path/to/store/indexes/index.cfs ; zip index.<VERSION>-cfs.zip *
+ // cd /path/to/store/indexes/index.nocfs ; zip index.<VERSION>-nocfs.zip *
+ //
+ // Then move those 2 zip files to your trunk checkout and add them
+ // to the oldNames array.
+
+ private static final int DOCS_COUNT = 35;
+ private static final int DELETED_ID = 7;
+
+ private static final int KNN_VECTOR_MIN_SUPPORTED_VERSION = LUCENE_9_0_0.major;
+ private static final String KNN_VECTOR_FIELD = "knn_field";
+ private static final FieldType KNN_VECTOR_FIELD_TYPE =
+ KnnVectorField.createFieldType(3, VectorSimilarityFunction.COSINE);
+ private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f};
+
+ public void testCreateCFS() throws IOException {
+ createIndex("index.cfs", true, false);
+ }
+
+ public void testCreateNoCFS() throws IOException {
+ createIndex("index.nocfs", false, false);
+ }
+
+ // These are only needed for the special upgrade test to verify
+ // that also single-segment indexes are correctly upgraded by IndexUpgrader.
+ // You don't need them to be build for non-4.0 (the test is happy with just one
+ // "old" segment format, version is unimportant:
+
+ public void testCreateSingleSegmentCFS() throws IOException {
+ createIndex("index.singlesegment-cfs", true, true);
+ }
+
+ public void testCreateSingleSegmentNoCFS() throws IOException {
+ createIndex("index.singlesegment-nocfs", false, true);
+ }
+
+ private Path getIndexDir() {
+ String path = System.getProperty("tests.bwcdir");
+ assumeTrue(
+ "backcompat creation tests must be run with -Dtests.bwcdir=/path/to/write/indexes",
+ path != null);
+ return Paths.get(path);
+ }
+
+ public void testCreateMoreTermsIndex() throws Exception {
+
+ Path indexDir = getIndexDir().resolve("moreterms");
+ Files.deleteIfExists(indexDir);
+ Directory dir = newFSDirectory(indexDir);
+
+ LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
+ mp.setNoCFSRatio(1.0);
+ mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
+ IndexWriterConfig conf =
+ new IndexWriterConfig(analyzer).setMergePolicy(mp).setUseCompoundFile(false);
+ IndexWriter writer = new IndexWriter(dir, conf);
+ LineFileDocs docs = new LineFileDocs(new Random(0));
+ for (int i = 0; i < 50; i++) {
+ writer.addDocument(docs.nextDoc());
+ }
+ docs.close();
+ writer.close();
+ dir.close();
+
+ // Gives you time to copy the index out!: (there is also
+ // a test option to not remove temp dir...):
+ Thread.sleep(100000);
+ }
+
+ // ant test -Dtestcase=TestBackwardsCompatibility -Dtestmethod=testCreateSortedIndex
+ // -Dtests.codec=default -Dtests.useSecurityManager=false -Dtests.bwcdir=/tmp/sorted
+ public void testCreateSortedIndex() throws Exception {
+
+ Path indexDir = getIndexDir().resolve("sorted");
+ Files.deleteIfExists(indexDir);
+ Directory dir = newFSDirectory(indexDir);
+
+ LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
+ mp.setNoCFSRatio(1.0);
+ mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
+ MockAnalyzer analyzer = new MockAnalyzer(random());
+ analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
+ // TODO: remove randomness
+ IndexWriterConfig conf = new IndexWriterConfig(analyzer);
+ conf.setMergePolicy(mp);
+ conf.setUseCompoundFile(false);
+ conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true)));
+ IndexWriter writer = new IndexWriter(dir, conf);
+ LineFileDocs docs = new LineFileDocs(random());
+ SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT);
+ parser.setTimeZone(TimeZone.getTimeZone("UTC"));
+ ParsePosition position = new ParsePosition(0);
+ Field dateDVField = null;
+ for (int i = 0; i < 50; i++) {
+ Document doc = docs.nextDoc();
+ String dateString = doc.get("date");
+
+ position.setIndex(0);
+ Date date = parser.parse(dateString, position);
+ if (position.getErrorIndex() != -1) {
+ throw new AssertionError("failed to parse \"" + dateString + "\" as date");
+ }
+ if (position.getIndex() != dateString.length()) {
+ throw new AssertionError("failed to parse \"" + dateString + "\" as date");
+ }
+ if (dateDVField == null) {
+ dateDVField = new NumericDocValuesField("dateDV", 0l);
+ doc.add(dateDVField);
+ }
+ dateDVField.setLongValue(date.getTime());
+ if (i == 250) {
+ writer.commit();
+ }
+ writer.addDocument(doc);
+ }
+ writer.forceMerge(1);
+ writer.close();
+ dir.close();
+ }
+
+ private void updateNumeric(IndexWriter writer, String id, String f, String cf, long value)
+ throws IOException {
+ writer.updateNumericDocValue(new Term("id", id), f, value);
+ writer.updateNumericDocValue(new Term("id", id), cf, value * 2);
+ }
+
+ private void updateBinary(IndexWriter writer, String id, String f, String cf, long value)
+ throws IOException {
+ writer.updateBinaryDocValue(new Term("id", id), f, toBytes(value));
+ writer.updateBinaryDocValue(new Term("id", id), cf, toBytes(value * 2));
+ }
+
+ // Creates an index with DocValues updates
+ public void testCreateIndexWithDocValuesUpdates() throws Exception {
+ Path indexDir = getIndexDir().resolve("dvupdates");
+ Files.deleteIfExists(indexDir);
+ Directory dir = newFSDirectory(indexDir);
+
+ IndexWriterConfig conf =
+ new IndexWriterConfig(new MockAnalyzer(random()))
+ .setUseCompoundFile(false)
+ .setMergePolicy(NoMergePolicy.INSTANCE);
+ IndexWriter writer = new IndexWriter(dir, conf);
+ // create an index w/ few doc-values fields, some with updates and some without
+ for (int i = 0; i < 30; i++) {
+ Document doc = new Document();
+ doc.add(new StringField("id", "" + i, Field.Store.NO));
+ doc.add(new NumericDocValuesField("ndv1", i));
+ doc.add(new NumericDocValuesField("ndv1_c", i * 2));
+ doc.add(new NumericDocValuesField("ndv2", i * 3));
+ doc.add(new NumericDocValuesField("ndv2_c", i * 6));
+ doc.add(new BinaryDocValuesField("bdv1", toBytes(i)));
+ doc.add(new BinaryDocValuesField("bdv1_c", toBytes(i * 2)));
+ doc.add(new BinaryDocValuesField("bdv2", toBytes(i * 3)));
+ doc.add(new BinaryDocValuesField("bdv2_c", toBytes(i * 6)));
+ writer.addDocument(doc);
+ if ((i + 1) % 10 == 0) {
+ writer.commit(); // flush every 10 docs
+ }
+ }
+
+ // first segment: no updates
+
+ // second segment: update two fields, same gen
+ updateNumeric(writer, "10", "ndv1", "ndv1_c", 100L);
+ updateBinary(writer, "11", "bdv1", "bdv1_c", 100L);
+ writer.commit();
+
+ // third segment: update few fields, different gens, few docs
+ updateNumeric(writer, "20", "ndv1", "ndv1_c", 100L);
+ updateBinary(writer, "21", "bdv1", "bdv1_c", 100L);
+ writer.commit();
+ updateNumeric(writer, "22", "ndv1", "ndv1_c", 200L); // update the field again
+ writer.commit();
+
+ writer.close();
+ dir.close();
+ }
+
+ public void testCreateEmptyIndex() throws Exception {
+ Path indexDir = getIndexDir().resolve("emptyIndex");
+ Files.deleteIfExists(indexDir);
+ IndexWriterConfig conf =
+ new IndexWriterConfig(new MockAnalyzer(random()))
+ .setUseCompoundFile(false)
+ .setMergePolicy(NoMergePolicy.INSTANCE);
+ try (Directory dir = newFSDirectory(indexDir);
+ IndexWriter writer = new IndexWriter(dir, conf)) {
+ writer.flush();
+ }
+ }
+
+ static final String[] oldNames = {
+ "9.0.0-cfs", // Force on separate lines
+ "9.0.0-nocfs",
+ "9.1.0-cfs",
+ "9.1.0-nocfs"
+ };
+
+ public static String[] getOldNames() {
+ return oldNames;
+ }
+
+ static final String[] oldSortedNames = {
+ "sorted.9.0.0", // Force on separate lines
+ "sorted.9.1.0"
+ };
+
+ public static String[] getOldSortedNames() {
+ return oldSortedNames;
+ }
+
+ static final String[] unsupportedNames = {
+ "1.9.0-cfs",
+ "1.9.0-nocfs",
+ "2.0.0-cfs",
+ "2.0.0-nocfs",
+ "2.1.0-cfs",
+ "2.1.0-nocfs",
+ "2.2.0-cfs",
+ "2.2.0-nocfs",
+ "2.3.0-cfs",
+ "2.3.0-nocfs",
+ "2.4.0-cfs",
+ "2.4.0-nocfs",
+ "2.4.1-cfs",
+ "2.4.1-nocfs",
+ "2.9.0-cfs",
+ "2.9.0-nocfs",
+ "2.9.1-cfs",
+ "2.9.1-nocfs",
+ "2.9.2-cfs",
+ "2.9.2-nocfs",
+ "2.9.3-cfs",
+ "2.9.3-nocfs",
+ "2.9.4-cfs",
+ "2.9.4-nocfs",
+ "3.0.0-cfs",
+ "3.0.0-nocfs",
+ "3.0.1-cfs",
+ "3.0.1-nocfs",
+ "3.0.2-cfs",
+ "3.0.2-nocfs",
+ "3.0.3-cfs",
+ "3.0.3-nocfs",
+ "3.1.0-cfs",
+ "3.1.0-nocfs",
+ "3.2.0-cfs",
+ "3.2.0-nocfs",
+ "3.3.0-cfs",
+ "3.3.0-nocfs",
+ "3.4.0-cfs",
+ "3.4.0-nocfs",
+ "3.5.0-cfs",
+ "3.5.0-nocfs",
+ "3.6.0-cfs",
+ "3.6.0-nocfs",
+ "3.6.1-cfs",
+ "3.6.1-nocfs",
+ "3.6.2-cfs",
+ "3.6.2-nocfs",
+ "4.0.0-cfs",
+ "4.0.0-cfs",
+ "4.0.0-nocfs",
+ "4.0.0.1-cfs",
+ "4.0.0.1-nocfs",
+ "4.0.0.2-cfs",
+ "4.0.0.2-nocfs",
+ "4.1.0-cfs",
+ "4.1.0-nocfs",
+ "4.2.0-cfs",
+ "4.2.0-nocfs",
+ "4.2.1-cfs",
+ "4.2.1-nocfs",
+ "4.3.0-cfs",
+ "4.3.0-nocfs",
+ "4.3.1-cfs",
+ "4.3.1-nocfs",
+ "4.4.0-cfs",
+ "4.4.0-nocfs",
+ "4.5.0-cfs",
+ "4.5.0-nocfs",
+ "4.5.1-cfs",
+ "4.5.1-nocfs",
+ "4.6.0-cfs",
+ "4.6.0-nocfs",
+ "4.6.1-cfs",
+ "4.6.1-nocfs",
+ "4.7.0-cfs",
+ "4.7.0-nocfs",
+ "4.7.1-cfs",
+ "4.7.1-nocfs",
+ "4.7.2-cfs",
+ "4.7.2-nocfs",
+ "4.8.0-cfs",
+ "4.8.0-nocfs",
+ "4.8.1-cfs",
+ "4.8.1-nocfs",
+ "4.9.0-cfs",
+ "4.9.0-nocfs",
+ "4.9.1-cfs",
+ "4.9.1-nocfs",
+ "4.10.0-cfs",
+ "4.10.0-nocfs",
+ "4.10.1-cfs",
+ "4.10.1-nocfs",
+ "4.10.2-cfs",
+ "4.10.2-nocfs",
+ "4.10.3-cfs",
+ "4.10.3-nocfs",
+ "4.10.4-cfs",
+ "4.10.4-nocfs",
+ "5x-with-4x-segments-cfs",
+ "5x-with-4x-segments-nocfs",
+ "5.0.0.singlesegment-cfs",
+ "5.0.0.singlesegment-nocfs",
+ "5.0.0-cfs",
+ "5.0.0-nocfs",
+ "5.1.0-cfs",
+ "5.1.0-nocfs",
+ "5.2.0-cfs",
+ "5.2.0-nocfs",
+ "5.2.1-cfs",
+ "5.2.1-nocfs",
+ "5.3.0-cfs",
+ "5.3.0-nocfs",
+ "5.3.1-cfs",
+ "5.3.1-nocfs",
+ "5.3.2-cfs",
+ "5.3.2-nocfs",
+ "5.4.0-cfs",
+ "5.4.0-nocfs",
+ "5.4.1-cfs",
+ "5.4.1-nocfs",
+ "5.5.0-cfs",
+ "5.5.0-nocfs",
+ "5.5.1-cfs",
+ "5.5.1-nocfs",
+ "5.5.2-cfs",
+ "5.5.2-nocfs",
+ "5.5.3-cfs",
+ "5.5.3-nocfs",
+ "5.5.4-cfs",
+ "5.5.4-nocfs",
+ "5.5.5-cfs",
+ "5.5.5-nocfs",
+ "6.0.0-cfs",
+ "6.0.0-nocfs",
+ "6.0.1-cfs",
+ "6.0.1-nocfs",
+ "6.1.0-cfs",
+ "6.1.0-nocfs",
+ "6.2.0-cfs",
+ "6.2.0-nocfs",
+ "6.2.1-cfs",
+ "6.2.1-nocfs",
+ "6.3.0-cfs",
+ "6.3.0-nocfs",
+ "6.4.0-cfs",
+ "6.4.0-nocfs",
+ "6.4.1-cfs",
+ "6.4.1-nocfs",
+ "6.4.2-cfs",
+ "6.4.2-nocfs",
+ "6.5.0-cfs",
+ "6.5.0-nocfs",
+ "6.5.1-cfs",
+ "6.5.1-nocfs",
+ "6.6.0-cfs",
+ "6.6.0-nocfs",
+ "6.6.1-cfs",
+ "6.6.1-nocfs",
+ "6.6.2-cfs",
+ "6.6.2-nocfs",
+ "6.6.3-cfs",
+ "6.6.3-nocfs",
+ "6.6.4-cfs",
+ "6.6.4-nocfs",
+ "6.6.5-cfs",
+ "6.6.5-nocfs",
+ "6.6.6-cfs",
+ "6.6.6-nocfs",
+ "7.0.0-cfs",
+ "7.0.0-nocfs",
+ "7.0.1-cfs",
+ "7.0.1-nocfs",
+ "7.1.0-cfs",
+ "7.1.0-nocfs",
+ "7.2.0-cfs",
+ "7.2.0-nocfs",
+ "7.2.1-cfs",
+ "7.2.1-nocfs",
+ "7.3.0-cfs",
+ "7.3.0-nocfs",
+ "7.3.1-cfs",
+ "7.3.1-nocfs",
+ "7.4.0-cfs",
+ "7.4.0-nocfs",
+ "7.5.0-cfs",
+ "7.5.0-nocfs",
+ "7.6.0-cfs",
+ "7.6.0-nocfs",
+ "7.7.0-cfs",
+ "7.7.0-nocfs",
+ "7.7.1-cfs",
+ "7.7.1-nocfs",
+ "7.7.2-cfs",
+ "7.7.2-nocfs",
+ "7.7.3-cfs",
+ "7.7.3-nocfs",
+ "8.0.0-cfs",
+ "8.0.0-nocfs",
+ "8.1.0-cfs",
+ "8.1.0-nocfs",
+ "8.1.1-cfs",
+ "8.1.1-nocfs",
+ "8.2.0-cfs",
+ "8.2.0-nocfs",
+ "8.3.0-cfs",
+ "8.3.0-nocfs",
+ "8.3.1-cfs",
+ "8.3.1-nocfs",
+ "8.4.0-cfs",
+ "8.4.0-nocfs",
+ "8.4.1-cfs",
+ "8.4.1-nocfs",
+ "8.5.0-cfs",
+ "8.5.0-nocfs",
+ "8.5.1-cfs",
+ "8.5.1-nocfs",
+ "8.5.2-cfs",
+ "8.5.2-nocfs",
+ "8.6.0-cfs",
+ "8.6.0-nocfs",
+ "8.6.1-cfs",
+ "8.6.1-nocfs",
+ "8.6.2-cfs",
+ "8.6.2-nocfs",
+ "8.6.3-cfs",
+ "8.6.3-nocfs",
+ "8.7.0-cfs",
+ "8.7.0-nocfs",
+ "8.8.0-cfs",
+ "8.8.0-nocfs",
+ "8.8.1-cfs",
+ "8.8.1-nocfs",
+ "8.8.2-cfs",
+ "8.8.2-nocfs",
+ "8.9.0-cfs",
+ "8.9.0-nocfs",
+ "8.10.0-cfs",
+ "8.10.0-nocfs",
+ "8.10.1-cfs",
+ "8.10.1-nocfs",
+ "8.11.0-cfs",
+ "8.11.0-nocfs",
+ "8.11.1-cfs",
+ "8.11.1-nocfs"
+ };
+
+ static final int MIN_BINARY_SUPPORTED_MAJOR = Version.MIN_SUPPORTED_MAJOR - 1;
+
+ static final String[] binarySupportedNames;
+
+ static {
+ ArrayList<String> list = new ArrayList<>();
+ for (String name : unsupportedNames) {
+ if (name.startsWith(MIN_BINARY_SUPPORTED_MAJOR + ".")) {
+ list.add(name);
+ }
+ }
+ binarySupportedNames = list.toArray(new String[0]);
+ }
+
+ // TODO: on 6.0.0 release, gen the single segment indices and add here:
+ static final String[] oldSingleSegmentNames = {};
+
+ public static String[] getOldSingleSegmentNames() {
+ return oldSingleSegmentNames;
+ }
+
+ static Map<String, Directory> oldIndexDirs;
+
+ /** Randomizes the use of some of hte constructor variations */
+ private static IndexUpgrader newIndexUpgrader(Directory dir) {
+ final boolean streamType = random().nextBoolean();
+ final int choice = TestUtil.nextInt(random(), 0, 2);
+ switch (choice) {
+ case 0:
+ return new IndexUpgrader(dir);
+ case 1:
+ return new IndexUpgrader(dir, streamType ? null : InfoStream.NO_OUTPUT, false);
+ case 2:
+ return new IndexUpgrader(dir, newIndexWriterConfig(null), false);
+ default:
+ fail("case statement didn't get updated when random bounds changed");
+ }
+ return null; // never get here
+ }
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ List<String> names = new ArrayList<>(oldNames.length + oldSingleSegmentNames.length);
+ names.addAll(Arrays.asList(oldNames));
+ names.addAll(Arrays.asList(oldSingleSegmentNames));
+ oldIndexDirs = new HashMap<>();
+ for (String name : names) {
+ Path dir = createTempDir(name);
+ InputStream resource =
+ TestBackwardsCompatibility.class.getResourceAsStream("index." + name + ".zip");
+ assertNotNull("Index name " + name + " not found", resource);
+ TestUtil.unzip(resource, dir);
+ oldIndexDirs.put(name, newFSDirectory(dir));
+ }
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ for (Directory d : oldIndexDirs.values()) {
+ d.close();
+ }
+ oldIndexDirs = null;
+ }
+
+ public void testAllVersionHaveCfsAndNocfs() {
+ // ensure all tested versions with cfs also have nocfs
+ String[] files = new String[oldNames.length];
+ System.arraycopy(oldNames, 0, files, 0, oldNames.length);
+ Arrays.sort(files);
+ String prevFile = "";
+ for (String file : files) {
+ if (prevFile.endsWith("-cfs")) {
+ String prefix = prevFile.replace("-cfs", "");
+ assertEquals("Missing -nocfs for backcompat index " + prefix, prefix + "-nocfs", file);
+ }
+ }
+ }
+
+ public void testAllVersionsTested() throws Exception {
+ Pattern constantPattern = Pattern.compile("LUCENE_(\\d+)_(\\d+)_(\\d+)(_ALPHA|_BETA)?");
+ // find the unique versions according to Version.java
+ List<String> expectedVersions = new ArrayList<>();
+ for (java.lang.reflect.Field field : Version.class.getDeclaredFields()) {
+ if (Modifier.isStatic(field.getModifiers()) && field.getType() == Version.class) {
+ Version v = (Version) field.get(Version.class);
+ if (v.equals(Version.LATEST)) {
+ continue;
+ }
+
+ Matcher constant = constantPattern.matcher(field.getName());
+ if (constant.matches() == false) {
+ continue;
+ }
+
+ expectedVersions.add(v.toString() + "-cfs");
+ }
+ }
+
+ // BEGIN TRUNK ONLY BLOCK
+ // on trunk, the last release of the prev major release is also untested
+ Version lastPrevMajorVersion = null;
+ for (java.lang.reflect.Field field : Version.class.getDeclaredFields()) {
+ if (Modifier.isStatic(field.getModifiers()) && field.getType() == Version.class) {
+ Version v = (Version) field.get(Version.class);
+ Matcher constant = constantPattern.matcher(field.getName());
+ if (constant.matches() == false) continue;
+ if (v.major == Version.LATEST.major - 1
+ && (lastPrevMajorVersion == null || v.onOrAfter(lastPrevMajorVersion))) {
+ lastPrevMajorVersion = v;
+ }
+ }
+ }
+ assertNotNull(lastPrevMajorVersion);
+ expectedVersions.remove(lastPrevMajorVersion.toString() + "-cfs");
+ // END TRUNK ONLY BLOCK
+
+ Collections.sort(expectedVersions);
+
+ // find what versions we are testing
+ List<String> testedVersions = new ArrayList<>();
+ for (String testedVersion : oldNames) {
+ if (testedVersion.endsWith("-cfs") == false) {
+ continue;
+ }
+ testedVersions.add(testedVersion);
+ }
+ Collections.sort(testedVersions);
+
+ int i = 0;
+ int j = 0;
+ List<String> missingFiles = new ArrayList<>();
+ List<String> extraFiles = new ArrayList<>();
+ while (i < expectedVersions.size() && j < testedVersions.size()) {
+ String expectedVersion = expectedVersions.get(i);
+ String testedVersion = testedVersions.get(j);
+ int compare = expectedVersion.compareTo(testedVersion);
+ if (compare == 0) { // equal, we can move on
+ ++i;
+ ++j;
+ } else if (compare < 0) { // didn't find test for version constant
+ missingFiles.add(expectedVersion);
+ ++i;
+ } else { // extra test file
+ extraFiles.add(testedVersion);
+ ++j;
+ }
+ }
+ while (i < expectedVersions.size()) {
+ missingFiles.add(expectedVersions.get(i));
+ ++i;
+ }
+ while (j < testedVersions.size()) {
+ missingFiles.add(testedVersions.get(j));
+ ++j;
+ }
+
+ // we could be missing up to 1 file, which may be due to a release that is in progress
+ if (missingFiles.size() <= 1 && extraFiles.isEmpty()) {
+ // success
+ return;
+ }
+
+ StringBuffer msg = new StringBuffer();
+ if (missingFiles.size() > 1) {
+ msg.append("Missing backcompat test files:\n");
+ for (String missingFile : missingFiles) {
+ msg.append(" " + missingFile + "\n");
+ }
+ }
+ if (extraFiles.isEmpty() == false) {
+ msg.append("Extra backcompat test files:\n");
+ for (String extraFile : extraFiles) {
+ msg.append(" " + extraFile + "\n");
+ }
+ }
+ fail(msg.toString());
+ }
+
+ /**
+ * This test checks that *only* IndexFormatTooOldExceptions are thrown when you open and operate
+ * on too old indexes!
+ */
+ public void testUnsupportedOldIndexes() throws Exception {
+ for (int i = 0; i < unsupportedNames.length; i++) {
+ if (VERBOSE) {
+ System.out.println("TEST: index " + unsupportedNames[i]);
+ }
+ Path oldIndexDir = createTempDir(unsupportedNames[i]);
+ TestUtil.unzip(
+ getDataInputStream("unsupported." + unsupportedNames[i] + ".zip"), oldIndexDir);
+ BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir);
+ // don't checkindex, these are intentionally not supported
+ dir.setCheckIndexOnClose(false);
+
+ IndexReader reader = null;
+ IndexWriter writer = null;
+ try {
+ reader = DirectoryReader.open(dir);
+ fail("DirectoryReader.open should not pass for " + unsupportedNames[i]);
+ } catch (IndexFormatTooOldException e) {
+ if (e.getReason() != null) {
+ assertNull(e.getVersion());
+ assertNull(e.getMinVersion());
+ assertNull(e.getMaxVersion());
+ assertEquals(
+ e.getMessage(),
+ new IndexFormatTooOldException(e.getResourceDescription(), e.getReason())
+ .getMessage());
+ } else {
+ assertNotNull(e.getVersion());
+ assertNotNull(e.getMinVersion());
+ assertNotNull(e.getMaxVersion());
+ assertTrue(e.getMessage(), e.getMaxVersion() >= e.getMinVersion());
+ assertTrue(
+ e.getMessage(),
+ e.getMaxVersion() < e.getVersion() || e.getVersion() < e.getMinVersion());
+ assertEquals(
+ e.getMessage(),
+ new IndexFormatTooOldException(
+ e.getResourceDescription(),
+ e.getVersion(),
+ e.getMinVersion(),
+ e.getMaxVersion())
+ .getMessage());
+ }
+ // pass
+ if (VERBOSE) {
+ System.out.println("TEST: got expected exc:");
+ e.printStackTrace(System.out);
+ }
+ } finally {
+ if (reader != null) reader.close();
+ reader = null;
+ }
+
+ try {
+ writer =
+ new IndexWriter(
+ dir, newIndexWriterConfig(new MockAnalyzer(random())).setCommitOnClose(false));
+ fail("IndexWriter creation should not pass for " + unsupportedNames[i]);
+ } catch (IndexFormatTooOldException e) {
+ if (e.getReason() != null) {
+ assertNull(e.getVersion());
+ assertNull(e.getMinVersion());
+ assertNull(e.getMaxVersion());
+ assertEquals(
+ e.getMessage(),
+ new IndexFormatTooOldException(e.getResourceDescription(), e.getReason())
+ .getMessage());
+ } else {
+ assertNotNull(e.getVersion());
+ assertNotNull(e.getMinVersion());
+ assertNotNull(e.getMaxVersion());
+ assertTrue(e.getMessage(), e.getMaxVersion() >= e.getMinVersion());
+ assertTrue(
+ e.getMessage(),
+ e.getMaxVersion() < e.getVersion() || e.getVersion() < e.getMinVersion());
+ assertEquals(
+ e.getMessage(),
+ new IndexFormatTooOldException(
+ e.getResourceDescription(),
+ e.getVersion(),
+ e.getMinVersion(),
+ e.getMaxVersion())
+ .getMessage());
+ }
+ // pass
+ if (VERBOSE) {
+ System.out.println("TEST: got expected exc:");
+ e.printStackTrace(System.out);
+ }
+ // Make sure exc message includes a path=
+ assertTrue("got exc message: " + e.getMessage(), e.getMessage().indexOf("path=\"") != -1);
+ } finally {
+ // we should fail to open IW, and so it should be null when we get here.
+ // However, if the test fails (i.e., IW did not fail on open), we need
+ // to close IW. However, if merges are run, IW may throw
+ // IndexFormatTooOldException, and we don't want to mask the fail()
+ // above, so close without waiting for merges.
+ if (writer != null) {
+ try {
+ writer.commit();
+ } finally {
+ writer.close();
+ }
+ }
+ }
+
+ ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
+ CheckIndex checker = new CheckIndex(dir);
+ checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8));
+ CheckIndex.Status indexStatus = checker.checkIndex();
+ if (unsupportedNames[i].startsWith("8.")) {
+ assertTrue(indexStatus.clean);
+ } else {
+ assertFalse(indexStatus.clean);
+ // CheckIndex doesn't enforce a minimum version, so we either get an
+ // IndexFormatTooOldException
+ // or an IllegalArgumentException saying that the codec doesn't exist.
+ boolean formatTooOld =
+ bos.toString(IOUtils.UTF_8).contains(IndexFormatTooOldException.class.getName());
+ boolean missingCodec = bos.toString(IOUtils.UTF_8).contains("Could not load codec");
+ assertTrue(formatTooOld || missingCodec);
+ }
+ checker.close();
+
+ dir.close();
+ }
+ }
+
+ public void testFullyMergeOldIndex() throws Exception {
+ for (String name : oldNames) {
+ if (VERBOSE) {
+ System.out.println("\nTEST: index=" + name);
+ }
+ Directory dir = newDirectory(oldIndexDirs.get(name));
+
+ final SegmentInfos oldSegInfos = SegmentInfos.readLatestCommit(dir);
+
+ IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));
+ w.forceMerge(1);
+ w.close();
+
+ final SegmentInfos segInfos = SegmentInfos.readLatestCommit(dir);
+ assertEquals(
+ oldSegInfos.getIndexCreatedVersionMajor(), segInfos.getIndexCreatedVersionMajor());
+ assertEquals(Version.LATEST, segInfos.asList().get(0).info.getVersion());
+ assertEquals(
+ oldSegInfos.asList().get(0).info.getMinVersion(),
+ segInfos.asList().get(0).info.getMinVersion());
+
+ dir.close();
+ }
+ }
+
+ public void testAddOldIndexes() throws IOException {
+ for (String name : oldNames) {
+ if (VERBOSE) {
+ System.out.println("\nTEST: old index " + name);
+ }
+ Directory oldDir = oldIndexDirs.get(name);
+ SegmentInfos infos = SegmentInfos.readLatestCommit(oldDir);
+
+ Directory targetDir = newDirectory();
+ if (infos.getCommitLuceneVersion().major != Version.LATEST.major) {
+ // both indexes are not compatible
+ Directory targetDir2 = newDirectory();
+ IndexWriter w =
+ new IndexWriter(targetDir2, newIndexWriterConfig(new MockAnalyzer(random())));
+ IllegalArgumentException e =
+ expectThrows(IllegalArgumentException.class, () -> w.addIndexes(oldDir));
+ assertTrue(
+ e.getMessage(),
+ e.getMessage()
+ .startsWith(
+ "Cannot use addIndexes(Directory) with indexes that have been created by a different Lucene version."));
+ w.close();
+ targetDir2.close();
+
+ // for the next test, we simulate writing to an index that was created on the same major
+ // version
+ new SegmentInfos(infos.getIndexCreatedVersionMajor()).commit(targetDir);
+ }
+
+ IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(new MockAnalyzer(random())));
+ w.addIndexes(oldDir);
+ w.close();
+
+ SegmentInfos si = SegmentInfos.readLatestCommit(targetDir);
+ assertNull(
+ "none of the segments should have been upgraded",
+ si.asList().stream()
+ .filter( // depending on the MergePolicy we might see these segments merged away
+ sci ->
+ sci.getId() != null
+ && sci.info.getVersion().onOrAfter(Version.fromBits(8, 6, 0)) == false)
+ .findAny()
+ .orElse(null));
+ if (VERBOSE) {
+ System.out.println("\nTEST: done adding indices; now close");
+ }
+
+ targetDir.close();
+ }
+ }
+
+ public void testAddOldIndexesReader() throws IOException {
+ for (String name : oldNames) {
+ Directory oldDir = oldIndexDirs.get(name);
+ SegmentInfos infos = SegmentInfos.readLatestCommit(oldDir);
+ DirectoryReader reader = DirectoryReader.open(oldDir);
+
+ Directory targetDir = newDirectory();
+ if (infos.getCommitLuceneVersion().major != Version.LATEST.major) {
+ Directory targetDir2 = newDirectory();
+ IndexWriter w =
+ new IndexWriter(targetDir2, newIndexWriterConfig(new MockAnalyzer(random())));
+ IllegalArgumentException e =
+ expectThrows(
+ IllegalArgumentException.class, () -> TestUtil.addIndexesSlowly(w, reader));
+ assertEquals(
+ e.getMessage(),
+ "Cannot merge a segment that has been created with major version 9 into this index which has been created by major version 10");
+ w.close();
+ targetDir2.close();
+
+ // for the next test, we simulate writing to an index that was created on the same major
+ // version
+ new SegmentInfos(infos.getIndexCreatedVersionMajor()).commit(targetDir);
+ }
+ IndexWriter w = new IndexWriter(targetDir, newIndexWriterConfig(new MockAnalyzer(random())));
+ TestUtil.addIndexesSlowly(w, reader);
+ w.close();
+ reader.close();
+ SegmentInfos si = SegmentInfos.readLatestCommit(targetDir);
+ assertNull(
+ "all SCIs should have an id now",
+ si.asList().stream().filter(sci -> sci.getId() == null).findAny().orElse(null));
+ targetDir.close();
+ }
+ }
+
+ public void testSearchOldIndex() throws Exception {
+ for (String name : oldNames) {
+ Version version = Version.parse(name.substring(0, name.indexOf('-')));
+ searchIndex(oldIndexDirs.get(name), name, Version.MIN_SUPPORTED_MAJOR, version);
+ }
+
+ if (TEST_NIGHTLY) {
+ for (String name : binarySupportedNames) {
+ Path oldIndexDir = createTempDir(name);
+ TestUtil.unzip(getDataInputStream("unsupported." + name + ".zip"), oldIndexDir);
+ try (BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir)) {
+ Version version = Version.parse(name.substring(0, name.indexOf('-')));
+ searchIndex(dir, name, MIN_BINARY_SUPPORTED_MAJOR, version);
+ }
+ }
+ }
+ }
+
+ public void testIndexOldIndexNoAdds() throws Exception {
+ for (String name : oldNames) {
+ Directory dir = newDirectory(oldIndexDirs.get(name));
+ Version version = Version.parse(name.substring(0, name.indexOf('-')));
+ changeIndexNoAdds(random(), dir, version);
+ dir.close();
+ }
+ }
+
+ public void testIndexOldIndex() throws Exception {
+ for (String name : oldNames) {
+ if (VERBOSE) {
+ System.out.println("TEST: oldName=" + name);
+ }
+ Directory dir = newDirectory(oldIndexDirs.get(name));
+ Version v = Version.parse(name.substring(0, name.indexOf('-')));
+ changeIndexWithAdds(random(), dir, v);
+ dir.close();
+ }
+ }
+
+ private void doTestHits(ScoreDoc[] hits, int expectedCount, IndexReader reader)
+ throws IOException {
+ final int hitCount = hits.length;
+ assertEquals("wrong number of hits", expectedCount, hitCount);
+ for (int i = 0; i < hitCount; i++) {
+ reader.document(hits[i].doc);
+ reader.getTermVectors(hits[i].doc);
+ }
+ }
+
+ public void searchIndex(
+ Directory dir, String oldName, int minIndexMajorVersion, Version nameVersion)
+ throws IOException {
+ // QueryParser parser = new QueryParser("contents", new MockAnalyzer(random));
+ // Query query = parser.parse("handle:1");
+ IndexCommit indexCommit = DirectoryReader.listCommits(dir).get(0);
+ IndexReader reader = DirectoryReader.open(indexCommit, minIndexMajorVersion, null);
+ IndexSearcher searcher = newSearcher(reader);
+
+ TestUtil.checkIndex(dir);
+
+ final Bits liveDocs = MultiBits.getLiveDocs(reader);
+ assertNotNull(liveDocs);
+
+ for (int i = 0; i < DOCS_COUNT; i++) {
+ if (liveDocs.get(i)) {
+ Document d = reader.document(i);
+ List<IndexableField> fields = d.getFields();
+ boolean isProxDoc = d.getField("content3") == null;
+ if (isProxDoc) {
+ assertEquals(7, fields.size());
+ IndexableField f = d.getField("id");
+ assertEquals("" + i, f.stringValue());
+
+ f = d.getField("utf8");
+ assertEquals(
+ "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
+
+ f = d.getField("autf8");
+ assertEquals(
+ "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.stringValue());
+
+ f = d.getField("content2");
+ assertEquals("here is more content with aaa aaa aaa", f.stringValue());
+
+ f = d.getField("fie\u2C77ld");
+ assertEquals("field with non-ascii name", f.stringValue());
+ }
+
+ Fields tfvFields = reader.getTermVectors(i);
+ assertNotNull("i=" + i, tfvFields);
+ Terms tfv = tfvFields.terms("utf8");
+ assertNotNull("docID=" + i + " index=" + oldName, tfv);
+ } else {
+ assertEquals(DELETED_ID, i);
+ }
+ }
+
+ // check docvalues fields
+ NumericDocValues dvByte = MultiDocValues.getNumericValues(reader, "dvByte");
+ BinaryDocValues dvBytesDerefFixed = MultiDocValues.getBinaryValues(reader, "dvBytesDerefFixed");
+ BinaryDocValues dvBytesDerefVar = MultiDocValues.getBinaryValues(reader, "dvBytesDerefVar");
+ SortedDocValues dvBytesSortedFixed =
+ MultiDocValues.getSortedValues(reader, "dvBytesSortedFixed");
+ SortedDocValues dvBytesSortedVar = MultiDocValues.getSortedValues(reader, "dvBytesSortedVar");
+ BinaryDocValues dvBytesStraightFixed =
+ MultiDocValues.getBinaryValues(reader, "dvBytesStraightFixed");
+ BinaryDocValues dvBytesStraightVar =
+ MultiDocValues.getBinaryValues(reader, "dvBytesStraightVar");
+ NumericDocValues dvDouble = MultiDocValues.getNumericValues(reader, "dvDouble");
+ NumericDocValues dvFloat = MultiDocValues.getNumericValues(reader, "dvFloat");
+ NumericDocValues dvInt = MultiDocValues.getNumericValues(reader, "dvInt");
+ NumericDocValues dvLong = MultiDocValues.getNumericValues(reader, "dvLong");
+ NumericDocValues dvPacked = MultiDocValues.getNumericValues(reader, "dvPacked");
+ NumericDocValues dvShort = MultiDocValues.getNumericValues(reader, "dvShort");
+
+ SortedSetDocValues dvSortedSet = MultiDocValues.getSortedSetValues(reader, "dvSortedSet");
+ SortedNumericDocValues dvSortedNumeric =
+ MultiDocValues.getSortedNumericValues(reader, "dvSortedNumeric");
+
+ for (int i = 0; i < DOCS_COUNT; i++) {
+ int id = Integer.parseInt(reader.document(i).get("id"));
+ assertEquals(i, dvByte.nextDoc());
+ assertEquals(id, dvByte.longValue());
+
+ byte[] bytes =
+ new byte[] {(byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id};
+ BytesRef expectedRef = new BytesRef(bytes);
+
+ assertEquals(i, dvBytesDerefFixed.nextDoc());
+ BytesRef term = dvBytesDerefFixed.binaryValue();
+ assertEquals(expectedRef, term);
+ assertEquals(i, dvBytesDerefVar.nextDoc());
+ term = dvBytesDerefVar.binaryValue();
+ assertEquals(expectedRef, term);
+ assertEquals(i, dvBytesSortedFixed.nextDoc());
+ term = dvBytesSortedFixed.lookupOrd(dvBytesSortedFixed.ordValue());
+ assertEquals(expectedRef, term);
+ assertEquals(i, dvBytesSortedVar.nextDoc());
+ term = dvBytesSortedVar.lookupOrd(dvBytesSortedVar.ordValue());
+ assertEquals(expectedRef, term);
+ assertEquals(i, dvBytesStraightFixed.nextDoc());
+ term = dvBytesStraightFixed.binaryValue();
+ assertEquals(expectedRef, term);
+ assertEquals(i, dvBytesStraightVar.nextDoc());
+ term = dvBytesStraightVar.binaryValue();
+ assertEquals(expectedRef, term);
+
+ assertEquals(i, dvDouble.nextDoc());
+ assertEquals((double) id, Double.longBitsToDouble(dvDouble.longValue()), 0D);
+ assertEquals(i, dvFloat.nextDoc());
+ assertEquals((float) id, Float.intBitsToFloat((int) dvFloat.longValue()), 0F);
+ assertEquals(i, dvInt.nextDoc());
+ assertEquals(id, dvInt.longValue());
+ assertEquals(i, dvLong.nextDoc());
+ assertEquals(id, dvLong.longValue());
+ assertEquals(i, dvPacked.nextDoc());
+ assertEquals(id, dvPacked.longValue());
+ assertEquals(i, dvShort.nextDoc());
+ assertEquals(id, dvShort.longValue());
+
+ assertEquals(i, dvSortedSet.nextDoc());
+ long ord = dvSortedSet.nextOrd();
+ assertEquals(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.nextOrd());
+ term = dvSortedSet.lookupOrd(ord);
+ assertEquals(expectedRef, term);
+
+ assertEquals(i, dvSortedNumeric.nextDoc());
+ assertEquals(1, dvSortedNumeric.docValueCount());
+ assertEquals(id, dvSortedNumeric.nextValue());
+ }
+
+ ScoreDoc[] hits =
+ searcher.search(new TermQuery(new Term(new String("content"), "aaa")), 1000).scoreDocs;
+
+ // First document should be #0
+ Document d = searcher.getIndexReader().document(hits[0].doc);
+ assertEquals("didn't get the right document first", "0", d.get("id"));
+
+ doTestHits(hits, 34, searcher.getIndexReader());
+
+ hits = searcher.search(new TermQuery(new Term(new String("content5"), "aaa")), 1000).scoreDocs;
+
+ doTestHits(hits, 34, searcher.getIndexReader());
+
+ hits = searcher.search(new TermQuery(new Term(new String("content6"), "aaa")), 1000).scoreDocs;
+
+ doTestHits(hits, 34, searcher.getIndexReader());
+
+ hits = searcher.search(new TermQuery(new Term("utf8", "\u0000")), 1000).scoreDocs;
+ assertEquals(34, hits.length);
+ hits =
+ searcher.search(
+ new TermQuery(new Term(new String("utf8"), "lu\uD834\uDD1Ece\uD834\uDD60ne")), 1000)
+ .scoreDocs;
+ assertEquals(34, hits.length);
+ hits = searcher.search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), 1000).scoreDocs;
+ assertEquals(34, hits.length);
+
+ doTestHits(
+ searcher.search(IntPoint.newRangeQuery("intPoint1d", 0, 34), 1000).scoreDocs,
+ 34,
+ searcher.getIndexReader());
+ doTestHits(
+ searcher.search(
+ IntPoint.newRangeQuery("intPoint2d", new int[] {0, 0}, new int[] {34, 68}), 1000)
+ .scoreDocs,
+ 34,
+ searcher.getIndexReader());
+ doTestHits(
+ searcher.search(FloatPoint.newRangeQuery("floatPoint1d", 0f, 34f), 1000).scoreDocs,
+ 34,
+ searcher.getIndexReader());
+ doTestHits(
+ searcher.search(
+ FloatPoint.newRangeQuery(
+ "floatPoint2d", new float[] {0f, 0f}, new float[] {34f, 68f}),
+ 1000)
+ .scoreDocs,
+ 34,
+ searcher.getIndexReader());
+ doTestHits(
+ searcher.search(LongPoint.newRangeQuery("longPoint1d", 0, 34), 1000).scoreDocs,
+ 34,
+ searcher.getIndexReader());
+ doTestHits(
+ searcher.search(
+ LongPoint.newRangeQuery("longPoint2d", new long[] {0, 0}, new long[] {34, 68}),
+ 1000)
+ .scoreDocs,
+ 34,
+ searcher.getIndexReader());
+ doTestHits(
+ searcher.search(DoublePoint.newRangeQuery("doublePoint1d", 0.0, 34.0), 1000).scoreDocs,
+ 34,
+ searcher.getIndexReader());
+ doTestHits(
+ searcher.search(
+ DoublePoint.newRangeQuery(
+ "doublePoint2d", new double[] {0.0, 0.0}, new double[] {34.0, 68.0}),
+ 1000)
+ .scoreDocs,
+ 34,
+ searcher.getIndexReader());
+
+ byte[] bytes1 = new byte[4];
+ byte[] bytes2 = new byte[] {0, 0, 0, (byte) 34};
+ doTestHits(
+ searcher.search(BinaryPoint.newRangeQuery("binaryPoint1d", bytes1, bytes2), 1000).scoreDocs,
+ 34,
+ searcher.getIndexReader());
+ byte[] bytes3 = new byte[] {0, 0, 0, (byte) 68};
+ doTestHits(
+ searcher.search(
+ BinaryPoint.newRangeQuery(
+ "binaryPoint2d", new byte[][] {bytes1, bytes1}, new byte[][] {bytes2, bytes3}),
+ 1000)
+ .scoreDocs,
+ 34,
+ searcher.getIndexReader());
+
+ // test vector values and KNN search
+ if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) {
+ // test vector values
+ int cnt = 0;
+ for (LeafReaderContext ctx : reader.leaves()) {
+ VectorValues values = ctx.reader().getVectorValues(KNN_VECTOR_FIELD);
+ if (values != null) {
+ assertEquals(KNN_VECTOR_FIELD_TYPE.vectorDimension(), values.dimension());
+ for (int doc = values.nextDoc(); doc != NO_MORE_DOCS; doc = values.nextDoc()) {
+ float[] expectedVector = {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * cnt};
+ assertArrayEquals(
+ "vectors do not match for doc=" + cnt, expectedVector, values.vectorValue(), 0);
+ cnt++;
+ }
+ }
+ }
+ assertEquals(DOCS_COUNT, cnt);
+
+ // test KNN search
+ ScoreDoc[] scoreDocs = assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0");
+ for (int i = 0; i < scoreDocs.length; i++) {
+ int id = Integer.parseInt(reader.document(scoreDocs[i].doc).get("id"));
+ int expectedId = i < DELETED_ID ? i : i + 1;
+ assertEquals(expectedId, id);
+ }
+ }
+
+ reader.close();
+ }
+
+ private static ScoreDoc[] assertKNNSearch(
+ IndexSearcher searcher,
+ float[] queryVector,
+ int k,
+ int expectedHitsCount,
+ String expectedFirstDocId)
+ throws IOException {
+ ScoreDoc[] hits =
+ searcher.search(new KnnVectorQuery(KNN_VECTOR_FIELD, queryVector, k), k).scoreDocs;
+ assertEquals("wrong number of hits", expectedHitsCount, hits.length);
+ Document d = searcher.doc(hits[0].doc);
+ assertEquals("wrong first document", expectedFirstDocId, d.get("id"));
+ return hits;
+ }
+
+ public void changeIndexWithAdds(Random random, Directory dir, Version nameVersion)
+ throws IOException {
+ SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
+ assertEquals(nameVersion, infos.getCommitLuceneVersion());
+ assertEquals(nameVersion, infos.getMinSegmentLuceneVersion());
+
+ // open writer
+ IndexWriter writer =
+ new IndexWriter(
+ dir,
+ newIndexWriterConfig(new MockAnalyzer(random))
+ .setOpenMode(OpenMode.APPEND)
+ .setMergePolicy(newLogMergePolicy()));
+ // add 10 docs
+ for (int i = 0; i < 10; i++) {
+ addDoc(writer, DOCS_COUNT + i);
+ }
+
+ // make sure writer sees right total -- writer seems not to know about deletes in .del?
+ final int expected = 45;
+ assertEquals("wrong doc count", expected, writer.getDocStats().numDocs);
+ writer.close();
+
+ // make sure searching sees right # hits for term search
+ IndexReader reader = DirectoryReader.open(dir);
+ IndexSearcher searcher = newSearcher(reader);
+ ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs;
+ Document d = searcher.getIndexReader().document(hits[0].doc);
+ assertEquals("wrong first document", "0", d.get("id"));
+ doTestHits(hits, 44, searcher.getIndexReader());
+
+ if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) {
+ // make sure KNN search sees all hits (graph may not be used if k is big)
+ assertKNNSearch(searcher, KNN_VECTOR, 1000, 44, "0");
+ // make sure KNN search using HNSW graph sees newly added docs
+ assertKNNSearch(
+ searcher,
+ new float[] {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * 44},
+ 10,
+ 10,
+ "44");
+ }
+ reader.close();
+
+ // fully merge
+ writer =
+ new IndexWriter(
+ dir,
+ newIndexWriterConfig(new MockAnalyzer(random))
+ .setOpenMode(OpenMode.APPEND)
+ .setMergePolicy(newLogMergePolicy()));
+ writer.forceMerge(1);
+ writer.close();
+
+ reader = DirectoryReader.open(dir);
+ searcher = newSearcher(reader);
+ // make sure searching sees right # hits fot term search
+ hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs;
+ assertEquals("wrong number of hits", 44, hits.length);
+ d = searcher.doc(hits[0].doc);
+ doTestHits(hits, 44, searcher.getIndexReader());
+ assertEquals("wrong first document", "0", d.get("id"));
+
+ if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) {
+ // make sure KNN search sees all hits
+ assertKNNSearch(searcher, KNN_VECTOR, 1000, 44, "0");
+ // make sure KNN search using HNSW graph sees newly added docs
+ assertKNNSearch(
+ searcher,
+ new float[] {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * 44},
+ 10,
+ 10,
+ "44");
+ }
+ reader.close();
+ }
+
+ public void changeIndexNoAdds(Random random, Directory dir, Version nameVersion)
+ throws IOException {
+ // make sure searching sees right # hits for term search
+ DirectoryReader reader = DirectoryReader.open(dir);
+ IndexSearcher searcher = newSearcher(reader);
+ ScoreDoc[] hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs;
+ assertEquals("wrong number of hits", 34, hits.length);
+ Document d = searcher.doc(hits[0].doc);
+ assertEquals("wrong first document", "0", d.get("id"));
+
+ if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) {
+ // make sure KNN search sees all hits
+ assertKNNSearch(searcher, KNN_VECTOR, 1000, 34, "0");
+ // make sure KNN search using HNSW graph retrieves correct results
+ assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0");
+ }
+ reader.close();
+
+ // fully merge
+ IndexWriter writer =
+ new IndexWriter(
+ dir, newIndexWriterConfig(new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND));
+ writer.forceMerge(1);
+ writer.close();
+
+ reader = DirectoryReader.open(dir);
+ searcher = newSearcher(reader);
+ // make sure searching sees right # hits fot term search
+ hits = searcher.search(new TermQuery(new Term("content", "aaa")), 1000).scoreDocs;
+ assertEquals("wrong number of hits", 34, hits.length);
+ doTestHits(hits, 34, searcher.getIndexReader());
+ // make sure searching sees right # hits for KNN search
+ if (nameVersion.major >= KNN_VECTOR_MIN_SUPPORTED_VERSION) {
+ // make sure KNN search sees all hits
+ assertKNNSearch(searcher, KNN_VECTOR, 1000, 34, "0");
+ // make sure KNN search using HNSW graph retrieves correct results
+ assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0");
+ }
+ reader.close();
+ }
+
+ public void createIndex(String dirName, boolean doCFS, boolean fullyMerged) throws IOException {
+ Path indexDir = getIndexDir().resolve(dirName);
+ Files.deleteIfExists(indexDir);
+ Directory dir = newFSDirectory(indexDir);
+ LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
+ mp.setNoCFSRatio(doCFS ? 1.0 : 0.0);
+ mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
+ // TODO: remove randomness
+ IndexWriterConfig conf =
+ new IndexWriterConfig(new MockAnalyzer(random()))
+ .setMaxBufferedDocs(10)
+ .setMergePolicy(NoMergePolicy.INSTANCE);
+ IndexWriter writer = new IndexWriter(dir, conf);
+
+ for (int i = 0; i < DOCS_COUNT; i++) {
+ addDoc(writer, i);
+ }
+ assertEquals("wrong doc count", DOCS_COUNT, writer.getDocStats().maxDoc);
+ if (fullyMerged) {
+ writer.forceMerge(1);
+ }
+ writer.close();
+
+ if (!fullyMerged) {
+ // open fresh writer so we get no prx file in the added segment
+ mp = new LogByteSizeMergePolicy();
+ mp.setNoCFSRatio(doCFS ? 1.0 : 0.0);
+ // TODO: remove randomness
+ conf =
+ new IndexWriterConfig(new MockAnalyzer(random()))
+ .setMaxBufferedDocs(10)
+ .setMergePolicy(NoMergePolicy.INSTANCE);
+ writer = new IndexWriter(dir, conf);
+ addNoProxDoc(writer);
+ writer.close();
+
+ conf =
+ new IndexWriterConfig(new MockAnalyzer(random()))
+ .setMaxBufferedDocs(10)
+ .setMergePolicy(NoMergePolicy.INSTANCE);
+ writer = new IndexWriter(dir, conf);
+ Term searchTerm = new Term("id", String.valueOf(DELETED_ID));
+ writer.deleteDocuments(searchTerm);
+ writer.close();
+ }
+
+ dir.close();
+ }
+
+ private void addDoc(IndexWriter writer, int id) throws IOException {
+ Document doc = new Document();
+ doc.add(new TextField("content", "aaa", Field.Store.NO));
+ doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
+ FieldType customType2 = new FieldType(TextField.TYPE_STORED);
+ customType2.setStoreTermVectors(true);
+ customType2.setStoreTermVectorPositions(true);
+ customType2.setStoreTermVectorOffsets(true);
+ doc.add(
+ new Field(
+ "autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2));
+ doc.add(
+ new Field(
+ "utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", customType2));
+ doc.add(new Field("content2", "here is more content with aaa aaa aaa", customType2));
+ doc.add(new Field("fie\u2C77ld", "field with non-ascii name", customType2));
+
+ // add docvalues fields
+ doc.add(new NumericDocValuesField("dvByte", (byte) id));
+ byte[] bytes =
+ new byte[] {(byte) (id >>> 24), (byte) (id >>> 16), (byte) (id >>> 8), (byte) id};
+ BytesRef ref = new BytesRef(bytes);
+ doc.add(new BinaryDocValuesField("dvBytesDerefFixed", ref));
+ doc.add(new BinaryDocValuesField("dvBytesDerefVar", ref));
+ doc.add(new SortedDocValuesField("dvBytesSortedFixed", ref));
+ doc.add(new SortedDocValuesField("dvBytesSortedVar", ref));
+ doc.add(new BinaryDocValuesField("dvBytesStraightFixed", ref));
+ doc.add(new BinaryDocValuesField("dvBytesStraightVar", ref));
+ doc.add(new DoubleDocValuesField("dvDouble", (double) id));
+ doc.add(new FloatDocValuesField("dvFloat", (float) id));
+ doc.add(new NumericDocValuesField("dvInt", id));
+ doc.add(new NumericDocValuesField("dvLong", id));
+ doc.add(new NumericDocValuesField("dvPacked", id));
+ doc.add(new NumericDocValuesField("dvShort", (short) id));
+ doc.add(new SortedSetDocValuesField("dvSortedSet", ref));
+ doc.add(new SortedNumericDocValuesField("dvSortedNumeric", id));
+
+ doc.add(new IntPoint("intPoint1d", id));
+ doc.add(new IntPoint("intPoint2d", id, 2 * id));
+ doc.add(new FloatPoint("floatPoint1d", (float) id));
+ doc.add(new FloatPoint("floatPoint2d", (float) id, (float) 2 * id));
+ doc.add(new LongPoint("longPoint1d", id));
+ doc.add(new LongPoint("longPoint2d", id, 2 * id));
+ doc.add(new DoublePoint("doublePoint1d", (double) id));
+ doc.add(new DoublePoint("doublePoint2d", (double) id, (double) 2 * id));
+ doc.add(new BinaryPoint("binaryPoint1d", bytes));
+ doc.add(new BinaryPoint("binaryPoint2d", bytes, bytes));
+
+ // a field with both offsets and term vectors for a cross-check
+ FieldType customType3 = new FieldType(TextField.TYPE_STORED);
+ customType3.setStoreTermVectors(true);
+ customType3.setStoreTermVectorPositions(true);
+ customType3.setStoreTermVectorOffsets(true);
+ customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
+ doc.add(new Field("content5", "here is more content with aaa aaa aaa", customType3));
+ // a field that omits only positions
+ FieldType customType4 = new FieldType(TextField.TYPE_STORED);
+ customType4.setStoreTermVectors(true);
+ customType4.setStoreTermVectorPositions(false);
+ customType4.setStoreTermVectorOffsets(true);
+ customType4.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+ doc.add(new Field("content6", "here is more content with aaa aaa aaa", customType4));
+
+ float[] vector = {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * id};
+ doc.add(new KnnVectorField(KNN_VECTOR_FIELD, vector, KNN_VECTOR_FIELD_TYPE));
+
+ // TODO:
+ // index different norms types via similarity (we use a random one currently?!)
+ // remove any analyzer randomness, explicitly add payloads for certain fields.
+ writer.addDocument(doc);
+ }
+
+ private void addNoProxDoc(IndexWriter writer) throws IOException {
+ Document doc = new Document();
+ FieldType customType = new FieldType(TextField.TYPE_STORED);
+ customType.setIndexOptions(IndexOptions.DOCS);
+ Field f = new Field("content3", "aaa", customType);
+ doc.add(f);
+ FieldType customType2 = new FieldType();
+ customType2.setStored(true);
+ customType2.setIndexOptions(IndexOptions.DOCS);
+ f = new Field("content4", "aaa", customType2);
+ doc.add(f);
+ writer.addDocument(doc);
+ }
+
+ private int countDocs(PostingsEnum docs) throws IOException {
+ int count = 0;
+ while ((docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ count++;
+ }
+ return count;
+ }
+
+ // flex: test basics of TermsEnum api on non-flex index
+ public void testNextIntoWrongField() throws Exception {
+ for (String name : oldNames) {
+ Directory dir = oldIndexDirs.get(name);
+ IndexReader r = DirectoryReader.open(dir);
+ TermsEnum terms = MultiTerms.getTerms(r, "content").iterator();
+ BytesRef t = terms.next();
+ assertNotNull(t);
+
+ // content field only has term aaa:
+ assertEquals("aaa", t.utf8ToString());
+ assertNull(terms.next());
+
+ BytesRef aaaTerm = new BytesRef("aaa");
+
+ // should be found exactly
+ assertEquals(TermsEnum.SeekStatus.FOUND, terms.seekCeil(aaaTerm));
+ assertEquals(DOCS_COUNT, countDocs(TestUtil.docs(random(), terms, null, PostingsEnum.NONE)));
+ assertNull(terms.next());
+
+ // should hit end of field
+ assertEquals(TermsEnum.SeekStatus.END, terms.seekCeil(new BytesRef("bbb")));
+ assertNull(terms.next());
+
+ // should seek to aaa
+ assertEquals(TermsEnum.SeekStatus.NOT_FOUND, terms.seekCeil(new BytesRef("a")));
+ assertTrue(terms.term().bytesEquals(aaaTerm));
+ assertEquals(DOCS_COUNT, countDocs(TestUtil.docs(random(), terms, null, PostingsEnum.NONE)));
+ assertNull(terms.next());
+
+ assertEquals(TermsEnum.SeekStatus.FOUND, terms.seekCeil(aaaTerm));
+ assertEquals(DOCS_COUNT, countDocs(TestUtil.docs(random(), terms, null, PostingsEnum.NONE)));
+ assertNull(terms.next());
+
+ r.close();
+ }
+ }
+
+ /**
+ * Test that we didn't forget to bump the current Constants.LUCENE_MAIN_VERSION. This is important
+ * so that we can determine which version of lucene wrote the segment.
+ */
+ public void testOldVersions() throws Exception {
+ // first create a little index with the current code and get the version
+ Directory currentDir = newDirectory();
+ RandomIndexWriter riw = new RandomIndexWriter(random(), currentDir);
+ riw.addDocument(new Document());
+ riw.close();
+ DirectoryReader ir = DirectoryReader.open(currentDir);
+ SegmentReader air = (SegmentReader) ir.leaves().get(0).reader();
+ Version currentVersion = air.getSegmentInfo().info.getVersion();
+ assertNotNull(currentVersion); // only 3.0 segments can have a null version
+ ir.close();
+ currentDir.close();
+
+ // now check all the old indexes, their version should be < the current version
+ for (String name : oldNames) {
+ Directory dir = oldIndexDirs.get(name);
+ DirectoryReader r = DirectoryReader.open(dir);
+ for (LeafReaderContext context : r.leaves()) {
+ air = (SegmentReader) context.reader();
+ Version oldVersion = air.getSegmentInfo().info.getVersion();
+ assertNotNull(oldVersion); // only 3.0 segments can have a null version
+ assertTrue(
+ "current Version.LATEST is <= an old index: did you forget to bump it?!",
+ currentVersion.onOrAfter(oldVersion));
+ }
+ r.close();
+ }
+ }
+
+ public void testIndexCreatedVersion() throws IOException {
+ for (String name : oldNames) {
+ Directory dir = oldIndexDirs.get(name);
+ SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
+ // those indexes are created by a single version so we can
+ // compare the commit version with the created version
+ assertEquals(infos.getCommitLuceneVersion().major, infos.getIndexCreatedVersionMajor());
+ }
+ }
+
+ public void testSegmentCommitInfoId() throws IOException {
+ for (String name : oldNames) {
+ Directory dir = oldIndexDirs.get(name);
+ SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
+ for (SegmentCommitInfo info : infos) {
+ if (info.info.getVersion().onOrAfter(Version.fromBits(8, 6, 0))) {
+ assertNotNull(info.toString(), info.getId());
+ } else {
+ assertNull(info.toString(), info.getId());
+ }
+ }
+ }
+ }
+
+ public void verifyUsesDefaultCodec(Directory dir, String name) throws Exception {
+ DirectoryReader r = DirectoryReader.open(dir);
+ for (LeafReaderContext context : r.leaves()) {
+ SegmentReader air = (SegmentReader) context.reader();
+ Codec codec = air.getSegmentInfo().info.getCodec();
+ assertTrue(
+ "codec used in "
+ + name
+ + " ("
+ + codec.getName()
+ + ") is not a default codec (does not begin with Lucene)",
+ codec.getName().startsWith("Lucene"));
+ }
+ r.close();
+ }
+
+ public void testAllIndexesUseDefaultCodec() throws Exception {
+ for (String name : oldNames) {
+ Directory dir = oldIndexDirs.get(name);
+ verifyUsesDefaultCodec(dir, name);
+ }
+ }
+
+ private int checkAllSegmentsUpgraded(Directory dir, int indexCreatedVersion) throws IOException {
+ final SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
+ if (VERBOSE) {
+ System.out.println("checkAllSegmentsUpgraded: " + infos);
+ }
+ for (SegmentCommitInfo si : infos) {
+ assertEquals(Version.LATEST, si.info.getVersion());
+ assertNotNull(si.getId());
+ }
+ assertEquals(Version.LATEST, infos.getCommitLuceneVersion());
+ assertEquals(indexCreatedVersion, infos.getIndexCreatedVersionMajor());
+ return infos.size();
+ }
+
+ private int getNumberOfSegments(Directory dir) throws IOException {
+ final SegmentInfos infos = SegmentInfos.readLatestCommit(dir);
+ return infos.size();
+ }
+
+ public void testUpgradeOldIndex() throws Exception {
+ List<String> names = new ArrayList<>(oldNames.length + oldSingleSegmentNames.length);
+ names.addAll(Arrays.asList(oldNames));
+ names.addAll(Arrays.asList(oldSingleSegmentNames));
+ for (String name : names) {
+ if (VERBOSE) {
+ System.out.println("testUpgradeOldIndex: index=" + name);
+ }
+ Directory dir = newDirectory(oldIndexDirs.get(name));
+ int indexCreatedVersion = SegmentInfos.readLatestCommit(dir).getIndexCreatedVersionMajor();
+
+ newIndexUpgrader(dir).upgrade();
+
+ checkAllSegmentsUpgraded(dir, indexCreatedVersion);
+
+ dir.close();
+ }
+ }
+
+ public void testIndexUpgraderCommandLineArgs() throws Exception {
+
+ PrintStream savedSystemOut = System.out;
+ System.setOut(new PrintStream(new ByteArrayOutputStream(), false, "UTF-8"));
+ try {
+ for (Map.Entry<String, Directory> entry : oldIndexDirs.entrySet()) {
+ String name = entry.getKey();
+ Directory origDir = entry.getValue();
+ int indexCreatedVersion =
+ SegmentInfos.readLatestCommit(origDir).getIndexCreatedVersionMajor();
+ Path dir = createTempDir(name);
+ try (FSDirectory fsDir = FSDirectory.open(dir)) {
+ // beware that ExtraFS might add extraXXX files
+ Set<String> extraFiles = Set.of(fsDir.listAll());
+ for (String file : origDir.listAll()) {
+ if (extraFiles.contains(file) == false) {
+ fsDir.copyFrom(origDir, file, file, IOContext.DEFAULT);
+ }
+ }
+ }
+
+ String path = dir.toAbsolutePath().toString();
+
+ List<String> args = new ArrayList<>();
+ if (random().nextBoolean()) {
+ args.add("-verbose");
+ }
+ if (random().nextBoolean()) {
+ args.add("-delete-prior-commits");
+ }
+ if (random().nextBoolean()) {
+ // TODO: need to better randomize this, but ...
+ // - LuceneTestCase.FS_DIRECTORIES is private
+ // - newFSDirectory returns BaseDirectoryWrapper
+ // - BaseDirectoryWrapper doesn't expose delegate
+ Class<? extends FSDirectory> dirImpl = NIOFSDirectory.class;
+
+ args.add("-dir-impl");
+ args.add(dirImpl.getName());
+ }
+ args.add(path);
+
+ IndexUpgrader.main(args.toArray(new String[0]));
+
+ Directory upgradedDir = newFSDirectory(dir);
+ try {
+ checkAllSegmentsUpgraded(upgradedDir, indexCreatedVersion);
+ } finally {
+ upgradedDir.close();
+ }
+ }
+ } finally {
+ System.setOut(savedSystemOut);
+ }
+ }
+
+ public void testUpgradeOldSingleSegmentIndexWithAdditions() throws Exception {
+ for (String name : oldSingleSegmentNames) {
+ if (VERBOSE) {
+ System.out.println("testUpgradeOldSingleSegmentIndexWithAdditions: index=" + name);
+ }
+ Directory dir = newDirectory(oldIndexDirs.get(name));
+ assertEquals("Original index must be single segment", 1, getNumberOfSegments(dir));
+ int indexCreatedVersion = SegmentInfos.readLatestCommit(dir).getIndexCreatedVersionMajor();
+
+ // create a bunch of dummy segments
+ int id = 40;
+ Directory ramDir = new ByteBuffersDirectory();
+ for (int i = 0; i < 3; i++) {
+ // only use Log- or TieredMergePolicy, to make document addition predictable and not
+ // suddenly merge:
+ MergePolicy mp = random().nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
+ IndexWriterConfig iwc =
+ new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(mp);
+ IndexWriter w = new IndexWriter(ramDir, iwc);
+ // add few more docs:
+ for (int j = 0; j < RANDOM_MULTIPLIER * random().nextInt(30); j++) {
+ addDoc(w, id++);
+ }
+ try {
+ w.commit();
+ } finally {
+ w.close();
+ }
+ }
+
+ // add dummy segments (which are all in current
+ // version) to single segment index
+ MergePolicy mp = random().nextBoolean() ? newLogMergePolicy() : newTieredMergePolicy();
+ IndexWriterConfig iwc = new IndexWriterConfig(null).setMergePolicy(mp);
+ IndexWriter w = new IndexWriter(dir, iwc);
+ w.addIndexes(ramDir);
+ try {
+ w.commit();
+ } finally {
+ w.close();
+ }
+
+ // determine count of segments in modified index
+ final int origSegCount = getNumberOfSegments(dir);
+
+ // ensure there is only one commit
+ assertEquals(1, DirectoryReader.listCommits(dir).size());
+ newIndexUpgrader(dir).upgrade();
+
+ final int segCount = checkAllSegmentsUpgraded(dir, indexCreatedVersion);
+ assertEquals(
+ "Index must still contain the same number of segments, as only one segment was upgraded and nothing else merged",
+ origSegCount,
+ segCount);
+
+ dir.close();
+ }
+ }
+
+ public static final String emptyIndex = "empty.9.0.0.zip";
+
+ public void testUpgradeEmptyOldIndex() throws Exception {
+ Path oldIndexDir = createTempDir("emptyIndex");
+ TestUtil.unzip(getDataInputStream(emptyIndex), oldIndexDir);
+ Directory dir = newFSDirectory(oldIndexDir);
+
+ newIndexUpgrader(dir).upgrade();
+
+ checkAllSegmentsUpgraded(dir, 9);
+
+ dir.close();
+ }
+
+ public static final String moreTermsIndex = "moreterms.9.0.0.zip";
+
+ public void testMoreTerms() throws Exception {
+ Path oldIndexDir = createTempDir("moreterms");
+ TestUtil.unzip(getDataInputStream(moreTermsIndex), oldIndexDir);
+ Directory dir = newFSDirectory(oldIndexDir);
+ DirectoryReader reader = DirectoryReader.open(dir);
+
+ verifyUsesDefaultCodec(dir, moreTermsIndex);
+ TestUtil.checkIndex(dir);
+ searchExampleIndex(reader);
+
+ reader.close();
+ dir.close();
+ }
+
+ public static final String dvUpdatesIndex = "dvupdates.9.0.0.zip";
+
+ private void assertNumericDocValues(LeafReader r, String f, String cf) throws IOException {
+ NumericDocValues ndvf = r.getNumericDocValues(f);
+ NumericDocValues ndvcf = r.getNumericDocValues(cf);
+ for (int i = 0; i < r.maxDoc(); i++) {
+ assertEquals(i, ndvcf.nextDoc());
+ assertEquals(i, ndvf.nextDoc());
+ assertEquals(ndvcf.longValue(), ndvf.longValue() * 2);
+ }
+ }
+
+ private void assertBinaryDocValues(LeafReader r, String f, String cf) throws IOException {
+ BinaryDocValues bdvf = r.getBinaryDocValues(f);
+ BinaryDocValues bdvcf = r.getBinaryDocValues(cf);
+ for (int i = 0; i < r.maxDoc(); i++) {
+ assertEquals(i, bdvf.nextDoc());
+ assertEquals(i, bdvcf.nextDoc());
+ assertEquals(getValue(bdvcf), getValue(bdvf) * 2);
+ }
+ }
+
+ private void verifyDocValues(Directory dir) throws IOException {
+ DirectoryReader reader = DirectoryReader.open(dir);
+ for (LeafReaderContext context : reader.leaves()) {
+ LeafReader r = context.reader();
+ assertNumericDocValues(r, "ndv1", "ndv1_c");
+ assertNumericDocValues(r, "ndv2", "ndv2_c");
+ assertBinaryDocValues(r, "bdv1", "bdv1_c");
+ assertBinaryDocValues(r, "bdv2", "bdv2_c");
+ }
+ reader.close();
+ }
+
+ public void testDocValuesUpdates() throws Exception {
+ Path oldIndexDir = createTempDir("dvupdates");
+ TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir);
+ Directory dir = newFSDirectory(oldIndexDir);
+ verifyUsesDefaultCodec(dir, dvUpdatesIndex);
+
+ verifyDocValues(dir);
+
+ // update fields and verify index
+ IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
+ IndexWriter writer = new IndexWriter(dir, conf);
+ updateNumeric(writer, "1", "ndv1", "ndv1_c", 300L);
+ updateNumeric(writer, "1", "ndv2", "ndv2_c", 300L);
+ updateBinary(writer, "1", "bdv1", "bdv1_c", 300L);
+ updateBinary(writer, "1", "bdv2", "bdv2_c", 300L);
+
+ writer.commit();
+ verifyDocValues(dir);
+
+ // merge all segments
+ writer.forceMerge(1);
+ writer.commit();
+ verifyDocValues(dir);
+
+ writer.close();
+ dir.close();
+ }
+
+ public void testDeletes() throws Exception {
+ Path oldIndexDir = createTempDir("dvupdates");
+ TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir);
+ Directory dir = newFSDirectory(oldIndexDir);
+ verifyUsesDefaultCodec(dir, dvUpdatesIndex);
+
+ IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
+ IndexWriter writer = new IndexWriter(dir, conf);
+
+ int maxDoc = writer.getDocStats().maxDoc;
+ writer.deleteDocuments(new Term("id", "1"));
+ if (random().nextBoolean()) {
+ writer.commit();
+ }
+
+ writer.forceMerge(1);
+ writer.commit();
+ assertEquals(maxDoc - 1, writer.getDocStats().maxDoc);
+
+ writer.close();
+ dir.close();
+ }
+
+ public void testSoftDeletes() throws Exception {
+ Path oldIndexDir = createTempDir("dvupdates");
+ TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir);
+ Directory dir = newFSDirectory(oldIndexDir);
+ verifyUsesDefaultCodec(dir, dvUpdatesIndex);
+ IndexWriterConfig conf =
+ new IndexWriterConfig(new MockAnalyzer(random())).setSoftDeletesField("__soft_delete");
+ IndexWriter writer = new IndexWriter(dir, conf);
+ int maxDoc = writer.getDocStats().maxDoc;
+ writer.updateDocValues(new Term("id", "1"), new NumericDocValuesField("__soft_delete", 1));
+
+ if (random().nextBoolean()) {
+ writer.commit();
+ }
+ writer.forceMerge(1);
+ writer.commit();
+ assertEquals(maxDoc - 1, writer.getDocStats().maxDoc);
+ writer.close();
+ dir.close();
+ }
+
+ public void testDocValuesUpdatesWithNewField() throws Exception {
+ Path oldIndexDir = createTempDir("dvupdates");
+ TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir);
+ Directory dir = newFSDirectory(oldIndexDir);
+ verifyUsesDefaultCodec(dir, dvUpdatesIndex);
+
+ // update fields and verify index
+ IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
+ IndexWriter writer = new IndexWriter(dir, conf);
+ // introduce a new field that we later update
+ writer.addDocument(
+ Arrays.asList(
+ new StringField("id", "" + Integer.MAX_VALUE, Field.Store.NO),
+ new NumericDocValuesField("new_numeric", 1),
+ new BinaryDocValuesField("new_binary", toBytes(1))));
+ writer.updateNumericDocValue(new Term("id", "1"), "new_numeric", 1);
+ writer.updateBinaryDocValue(new Term("id", "1"), "new_binary", toBytes(1));
+
+ writer.commit();
+ Runnable assertDV =
+ () -> {
+ boolean found = false;
+ try (DirectoryReader reader = DirectoryReader.open(dir)) {
+ for (LeafReaderContext ctx : reader.leaves()) {
+ LeafReader leafReader = ctx.reader();
+ TermsEnum id = leafReader.terms("id").iterator();
+ if (id.seekExact(new BytesRef("1"))) {
+ PostingsEnum postings = id.postings(null, PostingsEnum.NONE);
+ NumericDocValues numericDocValues = leafReader.getNumericDocValues("new_numeric");
+ BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("new_binary");
+ int doc;
+ while ((doc = postings.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ found = true;
+ assertTrue(binaryDocValues.advanceExact(doc));
+ assertTrue(numericDocValues.advanceExact(doc));
+ assertEquals(1, numericDocValues.longValue());
+ assertEquals(toBytes(1), binaryDocValues.binaryValue());
+ }
+ }
+ }
+ } catch (IOException e) {
+ throw new AssertionError(e);
+ }
+ assertTrue(found);
+ };
+ assertDV.run();
+ // merge all segments
+ writer.forceMerge(1);
+ writer.commit();
+ assertDV.run();
+ writer.close();
+ dir.close();
+ }
+
+ // LUCENE-5907
+ public void testUpgradeWithNRTReader() throws Exception {
+ for (String name : oldNames) {
+ Directory dir = newDirectory(oldIndexDirs.get(name));
+
+ IndexWriter writer =
+ new IndexWriter(
+ dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
+ writer.addDocument(new Document());
+ DirectoryReader r = DirectoryReader.open(writer);
+ writer.commit();
+ r.close();
+ writer.forceMerge(1);
+ writer.commit();
+ writer.rollback();
+ SegmentInfos.readLatestCommit(dir);
+ dir.close();
+ }
+ }
+
+ // LUCENE-5907
+ public void testUpgradeThenMultipleCommits() throws Exception {
+ for (String name : oldNames) {
+ Directory dir = newDirectory(oldIndexDirs.get(name));
+
+ IndexWriter writer =
+ new IndexWriter(
+ dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.APPEND));
+ writer.addDocument(new Document());
+ writer.commit();
+ writer.addDocument(new Document());
+ writer.commit();
+ writer.close();
+ dir.close();
+ }
+ }
+
+ public void testSortedIndex() throws Exception {
+ for (String name : oldSortedNames) {
+ Path path = createTempDir("sorted");
+ InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(name + ".zip");
+ assertNotNull("Sorted index index " + name + " not found", resource);
+ TestUtil.unzip(resource, path);
+
+ Directory dir = newFSDirectory(path);
+ DirectoryReader reader = DirectoryReader.open(dir);
+
+ assertEquals(1, reader.leaves().size());
+ Sort sort = reader.leaves().get(0).reader().getMetaData().getSort();
+ assertNotNull(sort);
+ assertEquals("<long: \"dateDV\">!", sort.toString());
+
+ // This will confirm the docs are really sorted
+ TestUtil.checkIndex(dir);
+
+ searchExampleIndex(reader);
+
+ reader.close();
+ dir.close();
+ }
+ }
+
+ private void searchExampleIndex(DirectoryReader reader) throws IOException {
+ IndexSearcher searcher = newSearcher(reader);
+
+ TopDocs topDocs = searcher.search(new FieldExistsQuery("titleTokenized"), 10);
+ assertEquals(50, topDocs.totalHits.value);
+
+ topDocs = searcher.search(new FieldExistsQuery("titleDV"), 10);
+ assertEquals(50, topDocs.totalHits.value);
+
+ topDocs = searcher.search(new TermQuery(new Term("body", "ja")), 10);
+ assertTrue(topDocs.totalHits.value > 0);
+
+ topDocs =
+ searcher.search(
+ IntPoint.newRangeQuery("docid_int", 42, 44),
+ 10,
+ new Sort(new SortField("docid_intDV", SortField.Type.INT)));
+ assertEquals(3, topDocs.totalHits.value);
+ assertEquals(3, topDocs.scoreDocs.length);
+ assertEquals(42, ((FieldDoc) topDocs.scoreDocs[0]).fields[0]);
+ assertEquals(43, ((FieldDoc) topDocs.scoreDocs[1]).fields[0]);
+ assertEquals(44, ((FieldDoc) topDocs.scoreDocs[2]).fields[0]);
+
+ topDocs = searcher.search(new TermQuery(new Term("body", "the")), 5);
+ assertTrue(topDocs.totalHits.value > 0);
+
+ topDocs =
+ searcher.search(
+ new MatchAllDocsQuery(), 5, new Sort(new SortField("dateDV", SortField.Type.LONG)));
+ assertEquals(50, topDocs.totalHits.value);
+ assertEquals(5, topDocs.scoreDocs.length);
+ long firstDate = (Long) ((FieldDoc) topDocs.scoreDocs[0]).fields[0];
+ long lastDate = (Long) ((FieldDoc) topDocs.scoreDocs[4]).fields[0];
+ assertTrue(firstDate <= lastDate);
+ }
+
+ static long getValue(BinaryDocValues bdv) throws IOException {
+ BytesRef term = bdv.binaryValue();
+ int idx = term.offset;
+ byte b = term.bytes[idx++];
+ long value = b & 0x7FL;
+ for (int shift = 7; (b & 0x80L) != 0; shift += 7) {
+ b = term.bytes[idx++];
+ value |= (b & 0x7FL) << shift;
+ }
+ return value;
+ }
+
+ // encodes a long into a BytesRef as VLong so that we get varying number of bytes when we update
+ static BytesRef toBytes(long value) {
+ BytesRef bytes = new BytesRef(10); // negative longs may take 10 bytes
+ while ((value & ~0x7FL) != 0L) {
+ bytes.bytes[bytes.length++] = (byte) ((value & 0x7FL) | 0x80L);
+ value >>>= 7;
+ }
+ bytes.bytes[bytes.length++] = (byte) value;
+ return bytes;
+ }
+
+ public void testFailOpenOldIndex() throws IOException {
+ for (String name : oldNames) {
+ Directory directory = oldIndexDirs.get(name);
+ IndexCommit commit = DirectoryReader.listCommits(directory).get(0);
+ IndexFormatTooOldException ex =
+ expectThrows(
+ IndexFormatTooOldException.class,
+ () -> StandardDirectoryReader.open(commit, Version.LATEST.major, null));
+ assertTrue(
+ ex.getMessage()
+ .contains(
+ "only supports reading from version " + Version.LATEST.major + " upwards."));
+ // now open with allowed min version
+ StandardDirectoryReader.open(commit, Version.MIN_SUPPORTED_MAJOR, null).close();
+ }
+ }
+
+ @Nightly
+ public void testReadNMinusTwoCommit() throws IOException {
+ for (String name : binarySupportedNames) {
+ Path oldIndexDir = createTempDir(name);
+ TestUtil.unzip(getDataInputStream("unsupported." + name + ".zip"), oldIndexDir);
+ try (BaseDirectoryWrapper dir = newFSDirectory(oldIndexDir)) {
+ IndexCommit commit = DirectoryReader.listCommits(dir).get(0);
+ StandardDirectoryReader.open(commit, MIN_BINARY_SUPPORTED_MAJOR, null).close();
+ }
+ }
+ }
+}
diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
index 1f76e829676..e21fabe9e5f 100644
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
@@ -27,7 +27,7 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene91.Lucene91Codec;
+import org.apache.lucene.codecs.lucene92.Lucene92Codec;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
@@ -152,7 +152,7 @@ public class CreateIndexTask extends PerfTask {
try {
final PostingsFormat postingsFormatChosen = PostingsFormat.forName(postingsFormat);
iwConf.setCodec(
- new Lucene91Codec() {
+ new Lucene92Codec() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return postingsFormatChosen;
diff --git a/lucene/core/src/java/module-info.java b/lucene/core/src/java/module-info.java
index f4139e7eaa3..cbb5c3a3147 100644
--- a/lucene/core/src/java/module-info.java
+++ b/lucene/core/src/java/module-info.java
@@ -15,6 +15,9 @@
* limitations under the License.
*/
+import org.apache.lucene.codecs.lucene92.Lucene92Codec;
+import org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsFormat;
+
/** Lucene Core. */
@SuppressWarnings("module") // the test framework is compiled after the core...
module org.apache.lucene.core {
@@ -27,8 +30,8 @@ module org.apache.lucene.core {
exports org.apache.lucene.analysis.tokenattributes;
exports org.apache.lucene.codecs;
exports org.apache.lucene.codecs.compressing;
- exports org.apache.lucene.codecs.lucene91;
exports org.apache.lucene.codecs.lucene90;
+ exports org.apache.lucene.codecs.lucene92;
exports org.apache.lucene.codecs.lucene90.blocktree;
exports org.apache.lucene.codecs.lucene90.compressing;
exports org.apache.lucene.codecs.perfield;
@@ -60,11 +63,11 @@ module org.apache.lucene.core {
provides org.apache.lucene.analysis.TokenizerFactory with
org.apache.lucene.analysis.standard.StandardTokenizerFactory;
provides org.apache.lucene.codecs.Codec with
- org.apache.lucene.codecs.lucene91.Lucene91Codec;
+ Lucene92Codec;
provides org.apache.lucene.codecs.DocValuesFormat with
org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat with
- org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat;
+ Lucene92HnswVectorsFormat;
provides org.apache.lucene.codecs.PostingsFormat with
org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
provides org.apache.lucene.index.SortFieldProvider with
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
index 176cc57cfb5..bb75a5bca40 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/Codec.java
@@ -55,7 +55,7 @@ public abstract class Codec implements NamedSPILoader.NamedSPI {
return LOADER;
}
- static Codec defaultCodec = LOADER.lookup("Lucene91");
+ static Codec defaultCodec = LOADER.lookup("Lucene92");
}
private final String name;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsFormat.java
index 6300c912183..3e1903874c5 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsFormat.java
@@ -85,7 +85,7 @@ public abstract class KnnVectorsFormat implements NamedSPILoader.NamedSPI {
@Override
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) {
throw new UnsupportedOperationException(
- "Attempt to write EMPTY VectorValues: maybe you forgot to use codec=Lucene91");
+ "Attempt to write EMPTY VectorValues: maybe you forgot to use codec=Lucene92");
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91Codec.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92Codec.java
similarity index 92%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91Codec.java
rename to lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92Codec.java
index 2c289a888c1..5264295a902 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91Codec.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92Codec.java
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene91;
+package org.apache.lucene.codecs.lucene92;
import java.util.Objects;
import org.apache.lucene.codecs.Codec;
@@ -45,14 +45,14 @@ import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
/**
- * Implements the Lucene 9.1 index format
+ * Implements the Lucene 9.2 index format
*
* <p>If you want to reuse functionality of this codec in another codec, extend {@link FilterCodec}.
*
- * @see org.apache.lucene.codecs.lucene91 package documentation for file format details.
+ * @see org.apache.lucene.codecs.lucene92 package documentation for file format details.
* @lucene.experimental
*/
-public class Lucene91Codec extends Codec {
+public class Lucene92Codec extends Codec {
/** Configuration option for the codec. */
public enum Mode {
@@ -80,7 +80,7 @@ public class Lucene91Codec extends Codec {
new PerFieldPostingsFormat() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
- return Lucene91Codec.this.getPostingsFormatForField(field);
+ return Lucene92Codec.this.getPostingsFormatForField(field);
}
};
@@ -89,7 +89,7 @@ public class Lucene91Codec extends Codec {
new PerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
- return Lucene91Codec.this.getDocValuesFormatForField(field);
+ return Lucene92Codec.this.getDocValuesFormatForField(field);
}
};
@@ -98,14 +98,14 @@ public class Lucene91Codec extends Codec {
new PerFieldKnnVectorsFormat() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
- return Lucene91Codec.this.getKnnVectorsFormatForField(field);
+ return Lucene92Codec.this.getKnnVectorsFormatForField(field);
}
};
private final StoredFieldsFormat storedFieldsFormat;
/** Instantiates a new codec. */
- public Lucene91Codec() {
+ public Lucene92Codec() {
this(Mode.BEST_SPEED);
}
@@ -114,13 +114,13 @@ public class Lucene91Codec extends Codec {
*
* @param mode stored fields compression mode to use for newly flushed/merged segments.
*/
- public Lucene91Codec(Mode mode) {
- super("Lucene91");
+ public Lucene92Codec(Mode mode) {
+ super("Lucene92");
this.storedFieldsFormat =
new Lucene90StoredFieldsFormat(Objects.requireNonNull(mode).storedMode);
this.defaultPostingsFormat = new Lucene90PostingsFormat();
this.defaultDVFormat = new Lucene90DocValuesFormat();
- this.defaultKnnVectorsFormat = new Lucene91HnswVectorsFormat();
+ this.defaultKnnVectorsFormat = new Lucene92HnswVectorsFormat();
}
@Override
@@ -196,7 +196,7 @@ public class Lucene91Codec extends Codec {
/**
* Returns the vectors format that should be used for writing new segments of <code>field</code>
*
- * <p>The default implementation always returns "Lucene91".
+ * <p>The default implementation always returns "lucene92".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsFormat.java
similarity index 73%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsFormat.java
rename to lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsFormat.java
index 49eaf74c240..d762033e96d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsFormat.java
@@ -15,25 +15,36 @@
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene91;
+package org.apache.lucene.codecs.lucene92;
import java.io.IOException;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
import org.apache.lucene.codecs.KnnVectorsWriter;
+import org.apache.lucene.codecs.lucene90.IndexedDISI;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.hnsw.HnswGraph;
/**
- * Lucene 9.1 vector format, which encodes numeric vector values and an optional associated graph
+ * Lucene 9.2 vector format, which encodes numeric vector values and an optional associated graph
* connecting the documents having values. The graph is used to power HNSW search. The format
* consists of three files:
*
* <h2>.vec (vector data) file</h2>
*
- * <p>This file stores all the floating-point vector data ordered by field, document ordinal, and
- * vector dimension. The floats are stored in little-endian byte order.
+ * <p>For each field:
+ *
+ * <ul>
+ * <li>Floating-point vector data ordered by field, document ordinal, and vector dimension. The
+ * floats are stored in little-endian byte order
+ * <li>DocIds encoded by {@link IndexedDISI#writeBitSet(DocIdSetIterator, IndexOutput, byte)},
+ * note that only in sparse case
+ * <li>OrdToDoc was encoded by {@link org.apache.lucene.util.packed.DirectMonotonicWriter}, note
+ * that only in sparse case
+ * </ul>
*
* <h2>.vex (vector index)</h2>
*
@@ -69,24 +80,26 @@ import org.apache.lucene.util.hnsw.HnswGraph;
* <li><b>[int]</b> the number of documents having values for this field
* <li><b>[int8]</b> if equals to -1, dense – all documents have values for a field. If equals to
* 0, sparse – some documents missing values.
- * <li><b>array[int]</b> for sparse case, the docids of documents having vectors, in order
+ * <li>DocIds were encoded by {@link IndexedDISI#writeBitSet(DocIdSetIterator, IndexOutput, byte)}
+ * <li>OrdToDoc was encoded by {@link org.apache.lucene.util.packed.DirectMonotonicWriter}, note
+ * that only in sparse case
* <li><b>[int]</b> the maximum number of connections (neigbours) that each node can have
* <li><b>[int]</b> number of levels in the graph
* <li>Graph nodes by level. For each level
* <ul>
* <li><b>[int]</b> the number of nodes on this level
* <li><b>array[int]</b> for levels greater than 0 list of nodes on this level, stored as
- * the level 0th nodes ordinals.
+ * the the level 0th nodes ordinals.
* </ul>
* </ul>
*
* @lucene.experimental
*/
-public final class Lucene91HnswVectorsFormat extends KnnVectorsFormat {
+public final class Lucene92HnswVectorsFormat extends KnnVectorsFormat {
- static final String META_CODEC_NAME = "Lucene91HnswVectorsFormatMeta";
- static final String VECTOR_DATA_CODEC_NAME = "Lucene91HnswVectorsFormatData";
- static final String VECTOR_INDEX_CODEC_NAME = "Lucene91HnswVectorsFormatIndex";
+ static final String META_CODEC_NAME = "lucene92HnswVectorsFormatMeta";
+ static final String VECTOR_DATA_CODEC_NAME = "lucene92HnswVectorsFormatData";
+ static final String VECTOR_INDEX_CODEC_NAME = "lucene92HnswVectorsFormatIndex";
static final String META_EXTENSION = "vem";
static final String VECTOR_DATA_EXTENSION = "vec";
static final String VECTOR_INDEX_EXTENSION = "vex";
@@ -105,40 +118,40 @@ public final class Lucene91HnswVectorsFormat extends KnnVectorsFormat {
/**
* Controls how many of the nearest neighbor candidates are connected to the new node. Defaults to
- * {@link Lucene91HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details.
+ * {@link Lucene92HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details.
*/
private final int maxConn;
/**
* The number of candidate neighbors to track while searching the graph for each newly inserted
- * node. Defaults to to {@link Lucene91HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link
+ * node. Defaults to to {@link Lucene92HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link
* HnswGraph} for details.
*/
private final int beamWidth;
- public Lucene91HnswVectorsFormat() {
+ public Lucene92HnswVectorsFormat() {
this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH);
}
- public Lucene91HnswVectorsFormat(int maxConn, int beamWidth) {
- super("Lucene91HnswVectorsFormat");
+ public Lucene92HnswVectorsFormat(int maxConn, int beamWidth) {
+ super("lucene92HnswVectorsFormat");
this.maxConn = maxConn;
this.beamWidth = beamWidth;
}
@Override
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
- return new Lucene91HnswVectorsWriter(state, maxConn, beamWidth);
+ return new Lucene92HnswVectorsWriter(state, maxConn, beamWidth);
}
@Override
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
- return new Lucene91HnswVectorsReader(state);
+ return new Lucene92HnswVectorsReader(state);
}
@Override
public String toString() {
- return "Lucene91HnswVectorsFormat(name = Lucene91HnswVectorsFormat, maxConn = "
+ return "lucene92HnswVectorsFormat(name = lucene92HnswVectorsFormat, maxConn = "
+ maxConn
+ ", beamWidth="
+ beamWidth
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsReader.java
similarity index 94%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsReader.java
rename to lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsReader.java
index 1df95f980b9..a03b0b9b32b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsReader.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene91;
+package org.apache.lucene.codecs.lucene92;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
@@ -51,14 +51,14 @@ import org.apache.lucene.util.packed.DirectMonotonicReader;
*
* @lucene.experimental
*/
-public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
+public final class Lucene92HnswVectorsReader extends KnnVectorsReader {
private final FieldInfos fieldInfos;
private final Map<String, FieldEntry> fields = new HashMap<>();
private final IndexInput vectorData;
private final IndexInput vectorIndex;
- Lucene91HnswVectorsReader(SegmentReadState state) throws IOException {
+ Lucene92HnswVectorsReader(SegmentReadState state) throws IOException {
this.fieldInfos = state.fieldInfos;
int versionMeta = readMetadata(state);
boolean success = false;
@@ -67,14 +67,14 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
openDataInput(
state,
versionMeta,
- Lucene91HnswVectorsFormat.VECTOR_DATA_EXTENSION,
- Lucene91HnswVectorsFormat.VECTOR_DATA_CODEC_NAME);
+ Lucene92HnswVectorsFormat.VECTOR_DATA_EXTENSION,
+ Lucene92HnswVectorsFormat.VECTOR_DATA_CODEC_NAME);
vectorIndex =
openDataInput(
state,
versionMeta,
- Lucene91HnswVectorsFormat.VECTOR_INDEX_EXTENSION,
- Lucene91HnswVectorsFormat.VECTOR_INDEX_CODEC_NAME);
+ Lucene92HnswVectorsFormat.VECTOR_INDEX_EXTENSION,
+ Lucene92HnswVectorsFormat.VECTOR_INDEX_CODEC_NAME);
success = true;
} finally {
if (success == false) {
@@ -86,7 +86,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
private int readMetadata(SegmentReadState state) throws IOException {
String metaFileName =
IndexFileNames.segmentFileName(
- state.segmentInfo.name, state.segmentSuffix, Lucene91HnswVectorsFormat.META_EXTENSION);
+ state.segmentInfo.name, state.segmentSuffix, Lucene92HnswVectorsFormat.META_EXTENSION);
int versionMeta = -1;
try (ChecksumIndexInput meta = state.directory.openChecksumInput(metaFileName, state.context)) {
Throwable priorE = null;
@@ -94,9 +94,9 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
versionMeta =
CodecUtil.checkIndexHeader(
meta,
- Lucene91HnswVectorsFormat.META_CODEC_NAME,
- Lucene91HnswVectorsFormat.VERSION_START,
- Lucene91HnswVectorsFormat.VERSION_CURRENT,
+ Lucene92HnswVectorsFormat.META_CODEC_NAME,
+ Lucene92HnswVectorsFormat.VERSION_START,
+ Lucene92HnswVectorsFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
readFields(meta, state.fieldInfos);
@@ -119,8 +119,8 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
CodecUtil.checkIndexHeader(
in,
codecName,
- Lucene91HnswVectorsFormat.VERSION_START,
- Lucene91HnswVectorsFormat.VERSION_CURRENT,
+ Lucene92HnswVectorsFormat.VERSION_START,
+ Lucene92HnswVectorsFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
if (versionMeta != versionVectorData) {
@@ -192,7 +192,7 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
@Override
public long ramBytesUsed() {
- long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene91HnswVectorsFormat.class);
+ long totalBytes = RamUsageEstimator.shallowSizeOfInstance(Lucene92HnswVectorsFormat.class);
totalBytes +=
RamUsageEstimator.sizeOfMap(
fields, RamUsageEstimator.shallowSizeOfInstance(FieldEntry.class));
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsWriter.java
similarity index 93%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsWriter.java
rename to lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsWriter.java
index 0653a8938b5..c2fc9b35975 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/Lucene91HnswVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/Lucene92HnswVectorsWriter.java
@@ -15,9 +15,9 @@
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene91;
+package org.apache.lucene.codecs.lucene92;
-import static org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
+import static org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
import java.io.IOException;
@@ -49,7 +49,7 @@ import org.apache.lucene.util.packed.DirectMonotonicWriter;
*
* @lucene.experimental
*/
-public final class Lucene91HnswVectorsWriter extends KnnVectorsWriter {
+public final class Lucene92HnswVectorsWriter extends KnnVectorsWriter {
private final SegmentWriteState segmentWriteState;
private final IndexOutput meta, vectorData, vectorIndex;
@@ -59,7 +59,7 @@ public final class Lucene91HnswVectorsWriter extends KnnVectorsWriter {
private final int beamWidth;
private boolean finished;
- Lucene91HnswVectorsWriter(SegmentWriteState state, int maxConn, int beamWidth)
+ Lucene92HnswVectorsWriter(SegmentWriteState state, int maxConn, int beamWidth)
throws IOException {
this.maxConn = maxConn;
this.beamWidth = beamWidth;
@@ -69,19 +69,19 @@ public final class Lucene91HnswVectorsWriter extends KnnVectorsWriter {
String metaFileName =
IndexFileNames.segmentFileName(
- state.segmentInfo.name, state.segmentSuffix, Lucene91HnswVectorsFormat.META_EXTENSION);
+ state.segmentInfo.name, state.segmentSuffix, Lucene92HnswVectorsFormat.META_EXTENSION);
String vectorDataFileName =
IndexFileNames.segmentFileName(
state.segmentInfo.name,
state.segmentSuffix,
- Lucene91HnswVectorsFormat.VECTOR_DATA_EXTENSION);
+ Lucene92HnswVectorsFormat.VECTOR_DATA_EXTENSION);
String indexDataFileName =
IndexFileNames.segmentFileName(
state.segmentInfo.name,
state.segmentSuffix,
- Lucene91HnswVectorsFormat.VECTOR_INDEX_EXTENSION);
+ Lucene92HnswVectorsFormat.VECTOR_INDEX_EXTENSION);
boolean success = false;
try {
@@ -91,20 +91,20 @@ public final class Lucene91HnswVectorsWriter extends KnnVectorsWriter {
CodecUtil.writeIndexHeader(
meta,
- Lucene91HnswVectorsFormat.META_CODEC_NAME,
- Lucene91HnswVectorsFormat.VERSION_CURRENT,
+ Lucene92HnswVectorsFormat.META_CODEC_NAME,
+ Lucene92HnswVectorsFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
CodecUtil.writeIndexHeader(
vectorData,
- Lucene91HnswVectorsFormat.VECTOR_DATA_CODEC_NAME,
- Lucene91HnswVectorsFormat.VERSION_CURRENT,
+ Lucene92HnswVectorsFormat.VECTOR_DATA_CODEC_NAME,
+ Lucene92HnswVectorsFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
CodecUtil.writeIndexHeader(
vectorIndex,
- Lucene91HnswVectorsFormat.VECTOR_INDEX_CODEC_NAME,
- Lucene91HnswVectorsFormat.VERSION_CURRENT,
+ Lucene92HnswVectorsFormat.VECTOR_INDEX_CODEC_NAME,
+ Lucene92HnswVectorsFormat.VERSION_CURRENT,
state.segmentInfo.getId(),
state.segmentSuffix);
maxDoc = state.segmentInfo.maxDoc();
@@ -143,7 +143,7 @@ public final class Lucene91HnswVectorsWriter extends KnnVectorsWriter {
long vectorIndexOffset = vectorIndex.getFilePointer();
// build the graph using the temporary vector data
- // we use Lucene91HnswVectorsReader.DenseOffHeapVectorValues for the graph construction
+ // we use Lucene92HnswVectorsReader.DenseOffHeapVectorValues for the graph construction
// doesn't need to know docIds
// TODO: separate random access vector values from DocIdSetIterator?
OffHeapVectorValues offHeapVectors =
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/OffHeapVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/OffHeapVectorValues.java
similarity index 97%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene91/OffHeapVectorValues.java
rename to lucene/core/src/java/org/apache/lucene/codecs/lucene92/OffHeapVectorValues.java
index 2b846db7f5b..199df605fe0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/OffHeapVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/OffHeapVectorValues.java
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.lucene.codecs.lucene91;
+package org.apache.lucene.codecs.lucene92;
import java.io.IOException;
import java.nio.ByteBuffer;
@@ -87,7 +87,7 @@ abstract class OffHeapVectorValues extends VectorValues
public abstract int ordToDoc(int ord);
static OffHeapVectorValues load(
- Lucene91HnswVectorsReader.FieldEntry fieldEntry, IndexInput vectorData) throws IOException {
+ Lucene92HnswVectorsReader.FieldEntry fieldEntry, IndexInput vectorData) throws IOException {
if (fieldEntry.docsWithFieldOffset == -2) {
return new EmptyOffHeapVectorValues(fieldEntry.dimension);
}
@@ -164,10 +164,10 @@ abstract class OffHeapVectorValues extends VectorValues
private final IndexedDISI disi;
// dataIn was used to init a new IndexedDIS for #randomAccess()
private final IndexInput dataIn;
- private final Lucene91HnswVectorsReader.FieldEntry fieldEntry;
+ private final Lucene92HnswVectorsReader.FieldEntry fieldEntry;
public SparseOffHeapVectorValues(
- Lucene91HnswVectorsReader.FieldEntry fieldEntry, IndexInput dataIn, IndexInput slice)
+ Lucene92HnswVectorsReader.FieldEntry fieldEntry, IndexInput dataIn, IndexInput slice)
throws IOException {
super(fieldEntry.dimension, fieldEntry.size, slice);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/package-info.java
similarity index 98%
rename from lucene/core/src/java/org/apache/lucene/codecs/lucene91/package-info.java
rename to lucene/core/src/java/org/apache/lucene/codecs/lucene92/package-info.java
index 04603320664..f87fd0f2995 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene91/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene92/package-info.java
@@ -16,7 +16,7 @@
*/
/**
- * Lucene 9.1 file format.
+ * Lucene 9.2 file format.
*
* <h2>Apache Lucene - Index File Formats</h2>
*
@@ -180,7 +180,7 @@
* of files, recording dimensionally indexed fields, to enable fast numeric range filtering
* and large numeric values like BigInteger and BigDecimal (1D) and geographic shape
* intersection (2D, 3D).
- * <li>{@link org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat Vector values}. The
+ * <li>{@link org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsFormat Vector values}. The
* vector format stores numeric vectors in a format optimized for random access and
* computation, supporting high-dimensional nearest-neighbor search.
* </ul>
@@ -310,7 +310,7 @@
* <td>Holds indexed points</td>
* </tr>
* <tr>
- * <td>{@link org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat Vector values}</td>
+ * <td>{@link org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsFormat Vector values}</td>
* <td>.vec, .vem</td>
* <td>Holds indexed vectors; <code>.vec</code> files contain the raw vector data, and
* <code>.vem</code> the vector metadata</td>
@@ -403,6 +403,8 @@
* smaller stored fields.
* <li>In version 9.0, vector-valued fields were added.
* <li>In version 9.1, vector-valued fields were modified to add a graph hierarchy.
+ * <li>In version 9.2, docs of vector-valued fields were moved from .vem to .vec and encoded by
+ * IndexDISI. ordToDoc mappings was added to .vem.
* </ul>
*
* <a id="Limitations"></a>
@@ -417,4 +419,4 @@
* <code>UInt64</code> values, or better yet, {@link org.apache.lucene.store.DataOutput#writeVInt
* VInt} values which have no limit. </div>
*/
-package org.apache.lucene.codecs.lucene91;
+package org.apache.lucene.codecs.lucene92;
diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
index 6e977e42f1e..cd3ccaa8e17 100644
--- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
+++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.Codec
@@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-org.apache.lucene.codecs.lucene91.Lucene91Codec
+org.apache.lucene.codecs.lucene92.Lucene92Codec
diff --git a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
index 692145891fb..35bd0c0824a 100644
--- a/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
+++ b/lucene/core/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
@@ -13,4 +13,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat
+org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsFormat
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90StoredFieldsFormatHighCompression.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90StoredFieldsFormatHighCompression.java
index 6b747ea00a1..fe6c43e2893 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90StoredFieldsFormatHighCompression.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90StoredFieldsFormatHighCompression.java
@@ -18,8 +18,8 @@ package org.apache.lucene.codecs.lucene90;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.codecs.lucene91.Lucene91Codec;
-import org.apache.lucene.codecs.lucene91.Lucene91Codec.Mode;
+import org.apache.lucene.codecs.lucene92.Lucene92Codec;
+import org.apache.lucene.codecs.lucene92.Lucene92Codec.Mode;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
@@ -31,7 +31,7 @@ import org.apache.lucene.tests.index.BaseStoredFieldsFormatTestCase;
public class TestLucene90StoredFieldsFormatHighCompression extends BaseStoredFieldsFormatTestCase {
@Override
protected Codec getCodec() {
- return new Lucene91Codec(Mode.BEST_COMPRESSION);
+ return new Lucene92Codec(Mode.BEST_COMPRESSION);
}
/**
@@ -41,7 +41,7 @@ public class TestLucene90StoredFieldsFormatHighCompression extends BaseStoredFie
Directory dir = newDirectory();
for (int i = 0; i < 10; i++) {
IndexWriterConfig iwc = newIndexWriterConfig();
- iwc.setCodec(new Lucene91Codec(RandomPicks.randomFrom(random(), Mode.values())));
+ iwc.setCodec(new Lucene92Codec(RandomPicks.randomFrom(random(), Mode.values())));
IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig());
Document doc = new Document();
doc.add(new StoredField("field1", "value1"));
@@ -70,7 +70,7 @@ public class TestLucene90StoredFieldsFormatHighCompression extends BaseStoredFie
expectThrows(
NullPointerException.class,
() -> {
- new Lucene91Codec(null);
+ new Lucene92Codec(null);
});
expectThrows(
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java b/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java
index f9c544b2e86..b2bf9cab30c 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java
@@ -30,9 +30,9 @@ import java.util.Set;
import java.util.concurrent.CountDownLatch;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
-import org.apache.lucene.codecs.lucene91.Lucene91Codec;
-import org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat;
-import org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsReader;
+import org.apache.lucene.codecs.lucene92.Lucene92Codec;
+import org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsFormat;
+import org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsReader;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -64,7 +64,7 @@ public class TestKnnGraph extends LuceneTestCase {
private static final String KNN_GRAPH_FIELD = "vector";
- private static int maxConn = Lucene91HnswVectorsFormat.DEFAULT_MAX_CONN;
+ private static int maxConn = Lucene92HnswVectorsFormat.DEFAULT_MAX_CONN;
private Codec codec;
private VectorSimilarityFunction similarityFunction;
@@ -77,11 +77,11 @@ public class TestKnnGraph extends LuceneTestCase {
}
codec =
- new Lucene91Codec() {
+ new Lucene92Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
- return new Lucene91HnswVectorsFormat(
- maxConn, Lucene91HnswVectorsFormat.DEFAULT_BEAM_WIDTH);
+ return new Lucene92HnswVectorsFormat(
+ maxConn, Lucene92HnswVectorsFormat.DEFAULT_BEAM_WIDTH);
}
};
@@ -91,7 +91,7 @@ public class TestKnnGraph extends LuceneTestCase {
@After
public void cleanup() {
- maxConn = Lucene91HnswVectorsFormat.DEFAULT_MAX_CONN;
+ maxConn = Lucene92HnswVectorsFormat.DEFAULT_MAX_CONN;
}
/** Basic test of creating documents in a graph */
@@ -238,8 +238,8 @@ public class TestKnnGraph extends LuceneTestCase {
PerFieldKnnVectorsFormat.FieldsReader perFieldReader =
(PerFieldKnnVectorsFormat.FieldsReader)
((CodecReader) getOnlyLeafReader(reader)).getVectorReader();
- Lucene91HnswVectorsReader vectorReader =
- (Lucene91HnswVectorsReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
+ Lucene92HnswVectorsReader vectorReader =
+ (Lucene92HnswVectorsReader) perFieldReader.getFieldReader(KNN_GRAPH_FIELD);
graph = copyGraph(vectorReader.getGraph(KNN_GRAPH_FIELD));
}
}
@@ -437,8 +437,8 @@ public class TestKnnGraph extends LuceneTestCase {
if (perFieldReader == null) {
continue;
}
- Lucene91HnswVectorsReader vectorReader =
- (Lucene91HnswVectorsReader) perFieldReader.getFieldReader(vectorField);
+ Lucene92HnswVectorsReader vectorReader =
+ (Lucene92HnswVectorsReader) perFieldReader.getFieldReader(vectorField);
if (vectorReader == null) {
continue;
}
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java
index 822ef78197c..8b9fe855149 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java
@@ -37,9 +37,9 @@ import java.util.Locale;
import java.util.Set;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
-import org.apache.lucene.codecs.lucene91.Lucene91Codec;
-import org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat;
-import org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsReader;
+import org.apache.lucene.codecs.lucene92.Lucene92Codec;
+import org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsFormat;
+import org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsReader;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
@@ -251,7 +251,7 @@ public class KnnGraphTester {
KnnVectorsReader vectorsReader =
((PerFieldKnnVectorsFormat.FieldsReader) ((CodecReader) leafReader).getVectorReader())
.getFieldReader(KNN_FIELD);
- HnswGraph knnValues = ((Lucene91HnswVectorsReader) vectorsReader).getGraph(KNN_FIELD);
+ HnswGraph knnValues = ((Lucene92HnswVectorsReader) vectorsReader).getGraph(KNN_FIELD);
System.out.printf("Leaf %d has %d documents\n", context.ord, leafReader.maxDoc());
printGraphFanout(knnValues, leafReader.maxDoc());
}
@@ -579,10 +579,10 @@ public class KnnGraphTester {
private int createIndex(Path docsPath, Path indexPath) throws IOException {
IndexWriterConfig iwc = new IndexWriterConfig().setOpenMode(IndexWriterConfig.OpenMode.CREATE);
iwc.setCodec(
- new Lucene91Codec() {
+ new Lucene92Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
- return new Lucene91HnswVectorsFormat(maxConn, beamWidth);
+ return new Lucene92HnswVectorsFormat(maxConn, beamWidth);
}
});
// iwc.setMergePolicy(NoMergePolicy.INSTANCE);
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
index a151d80b9c1..f0068f03077 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
@@ -25,9 +25,9 @@ import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.codecs.KnnVectorsFormat;
-import org.apache.lucene.codecs.lucene91.Lucene91Codec;
-import org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat;
-import org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsReader;
+import org.apache.lucene.codecs.lucene92.Lucene92Codec;
+import org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsFormat;
+import org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsReader;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.KnnVectorField;
@@ -81,10 +81,10 @@ public class TestHnswGraph extends LuceneTestCase {
IndexWriterConfig iwc =
new IndexWriterConfig()
.setCodec(
- new Lucene91Codec() {
+ new Lucene92Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
- return new Lucene91HnswVectorsFormat(maxConn, beamWidth);
+ return new Lucene92HnswVectorsFormat(maxConn, beamWidth);
}
});
try (IndexWriter iw = new IndexWriter(dir, iwc)) {
@@ -111,7 +111,7 @@ public class TestHnswGraph extends LuceneTestCase {
assertEquals(indexedDoc, ctx.reader().numDocs());
assertVectorsEqual(v3, values);
HnswGraph graphValues =
- ((Lucene91HnswVectorsReader)
+ ((Lucene92HnswVectorsReader)
((PerFieldKnnVectorsFormat.FieldsReader)
((CodecReader) ctx.reader()).getVectorReader())
.getFieldReader("field"))
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
index 0fe44266361..5372ebd2f92 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java
@@ -40,7 +40,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene91.Lucene91Codec;
+import org.apache.lucene.codecs.lucene92.Lucene92Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntPoint;
@@ -961,7 +961,7 @@ public class TestSuggestField extends LuceneTestCase {
IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
iwc.setMergePolicy(newLogMergePolicy());
Codec filterCodec =
- new Lucene91Codec() {
+ new Lucene92Codec() {
CompletionPostingsFormat.FSTLoadMode fstLoadMode =
RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values());
PostingsFormat postingsFormat = new Completion90PostingsFormat(fstLoadMode);
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/TestRuleSetupAndRestoreClassEnv.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/TestRuleSetupAndRestoreClassEnv.java
index a7f9a7afdc6..8392640558a 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/TestRuleSetupAndRestoreClassEnv.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/TestRuleSetupAndRestoreClassEnv.java
@@ -38,7 +38,7 @@ import java.util.TimeZone;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene91.Lucene91Codec;
+import org.apache.lucene.codecs.lucene92.Lucene92Codec;
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.tests.codecs.asserting.AssertingCodec;
@@ -193,9 +193,9 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
} else if ("Compressing".equals(TEST_CODEC)
|| ("random".equals(TEST_CODEC) && randomVal == 6 && !shouldAvoidCodec("Compressing"))) {
codec = CompressingCodec.randomInstance(random);
- } else if ("Lucene91".equals(TEST_CODEC)
- || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene91"))) {
- codec = new Lucene91Codec(RandomPicks.randomFrom(random, Lucene91Codec.Mode.values()));
+ } else if ("Lucene92".equals(TEST_CODEC)
+ || ("random".equals(TEST_CODEC) && randomVal == 5 && !shouldAvoidCodec("Lucene92"))) {
+ codec = new Lucene92Codec(RandomPicks.randomFrom(random, Lucene92Codec.Mode.values()));
} else if (!"random".equals(TEST_CODEC)) {
codec = Codec.forName(TEST_CODEC);
} else if ("random".equals(TEST_POSTINGSFORMAT)) {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/TestUtil.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/TestUtil.java
index fb085125e8b..e6722972ac2 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/TestUtil.java
@@ -55,8 +55,8 @@ import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.blocktreeords.BlockTreeOrdsPostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene90.Lucene90PostingsFormat;
-import org.apache.lucene.codecs.lucene91.Lucene91Codec;
-import org.apache.lucene.codecs.lucene91.Lucene91HnswVectorsFormat;
+import org.apache.lucene.codecs.lucene92.Lucene92Codec;
+import org.apache.lucene.codecs.lucene92.Lucene92HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.document.BinaryDocValuesField;
@@ -1236,7 +1236,7 @@ public final class TestUtil {
* different than {@link Codec#getDefault()} because that is randomized.
*/
public static Codec getDefaultCodec() {
- return new Lucene91Codec();
+ return new Lucene92Codec();
}
/**
@@ -1322,7 +1322,7 @@ public final class TestUtil {
* Lucene.
*/
public static KnnVectorsFormat getDefaultKnnVectorsFormat() {
- return new Lucene91HnswVectorsFormat();
+ return new Lucene92HnswVectorsFormat();
}
public static boolean anyFilesExceptWriteLock(Directory dir) throws IOException {