You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ju...@apache.org on 2021/04/26 18:26:56 UTC
[lucene] branch main updated: LUCENE-9908: Move VectorValues#search
to LeafReader (#104)
This is an automated email from the ASF dual-hosted git repository.
julietibs pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 3115f85 LUCENE-9908: Move VectorValues#search to LeafReader (#104)
3115f85 is described below
commit 3115f8569765733af7a5765b60ef4a7d894bbbff
Author: Julie Tibshirani <ju...@elastic.co>
AuthorDate: Mon Apr 26 11:26:49 2021 -0700
LUCENE-9908: Move VectorValues#search to LeafReader (#104)
This PR removes `VectorValues#search` in favor of exposing NN search through
`VectorReader#search` and `LeafReader#searchNearestVectors`. It also marks the
vector methods on `LeafReader` as experimental.
---
.../codecs/simpletext/SimpleTextVectorReader.java | 10 +-
.../org/apache/lucene/codecs/VectorFormat.java | 9 +-
.../org/apache/lucene/codecs/VectorReader.java | 17 +++
.../org/apache/lucene/codecs/VectorWriter.java | 6 --
.../codecs/lucene90/Lucene90VectorReader.java | 120 +++++++++++----------
.../java/org/apache/lucene/index/CodecReader.java | 14 +++
.../apache/lucene/index/DocValuesLeafReader.java | 7 ++
.../org/apache/lucene/index/FilterLeafReader.java | 7 ++
.../java/org/apache/lucene/index/LeafReader.java | 20 ++++
.../apache/lucene/index/MergeReaderWrapper.java | 7 ++
.../apache/lucene/index/ParallelLeafReader.java | 9 ++
.../lucene/index/SlowCodecReaderWrapper.java | 6 ++
.../apache/lucene/index/SortingCodecReader.java | 6 ++
.../java/org/apache/lucene/index/VectorValues.java | 25 +----
.../apache/lucene/index/VectorValuesWriter.java | 11 --
.../org/apache/lucene/util/hnsw/HnswGraph.java | 9 +-
.../test/org/apache/lucene/index/TestKnnGraph.java | 2 +-
.../lucene/index/TestSegmentToThreadMapping.java | 6 ++
.../apache/lucene/util/hnsw/KnnGraphTester.java | 2 +-
.../apache/lucene/util/hnsw/MockVectorValues.java | 6 --
.../test/org/apache/lucene/util/hnsw/TestHnsw.java | 6 --
.../search/highlight/TermVectorLeafReader.java | 6 ++
.../apache/lucene/index/memory/MemoryIndex.java | 8 +-
.../java/org/apache/lucene/search/QueryUtils.java | 5 +
24 files changed, 205 insertions(+), 119 deletions(-)
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorReader.java
index 1ef0200..7f31892 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextVectorReader.java
@@ -142,6 +142,11 @@ public class SimpleTextVectorReader extends VectorReader {
}
@Override
+ public TopDocs search(String field, float[] target, int k, int fanout) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public void checkIntegrity() throws IOException {
IndexInput clone = dataIn.clone();
clone.seek(0);
@@ -334,11 +339,6 @@ public class SimpleTextVectorReader extends VectorReader {
public BytesRef binaryValue(int targetOrd) throws IOException {
throw new UnsupportedOperationException();
}
-
- @Override
- public TopDocs search(float[] target, int k, int fanout) throws IOException {
- throw new UnsupportedOperationException();
- }
}
private int readInt(IndexInput in, BytesRef field) throws IOException {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/VectorFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/VectorFormat.java
index 7cce5b2..44ae27c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/VectorFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/VectorFormat.java
@@ -21,6 +21,8 @@ import java.io.IOException;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.VectorValues;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopDocsCollector;
/**
* Encodes/decodes per-document vector and any associated indexing structures required to support
@@ -61,7 +63,12 @@ public abstract class VectorFormat {
}
@Override
- public void close() throws IOException {}
+ public TopDocs search(String field, float[] target, int k, int fanout) {
+ return TopDocsCollector.EMPTY_TOPDOCS;
+ }
+
+ @Override
+ public void close() {}
@Override
public long ramBytesUsed() {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/VectorReader.java b/lucene/core/src/java/org/apache/lucene/codecs/VectorReader.java
index 6b878ca..6808f9a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/VectorReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/VectorReader.java
@@ -20,6 +20,7 @@ package org.apache.lucene.codecs;
import java.io.Closeable;
import java.io.IOException;
import org.apache.lucene.index.VectorValues;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Accountable;
/** Reads vectors from an index. */
@@ -42,6 +43,22 @@ public abstract class VectorReader implements Closeable, Accountable {
public abstract VectorValues getVectorValues(String field) throws IOException;
/**
+ * Return the k nearest neighbor documents as determined by comparison of their vector values for
+ * this field, to the given vector, by the field's search strategy. If the search strategy is
+ * reversed, lower values indicate nearer vectors, otherwise higher scores indicate nearer
+ * vectors. Unlike relevance scores, vector scores may be negative.
+ *
+ * @param field the vector field to search
+ * @param target the vector-valued query
+ * @param k the number of docs to return
+ * @param fanout control the accuracy/speed tradeoff - larger values give better recall at higher
+ * cost
+ * @return the k nearest neighbor documents, along with their (searchStrategy-specific) scores.
+ */
+ public abstract TopDocs search(String field, float[] target, int k, int fanout)
+ throws IOException;
+
+ /**
* Returns an instance optimized for merging. This instance may only be consumed in the thread
* that called {@link #getMergeInstance()}.
*
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/VectorWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/VectorWriter.java
index b5c9681..547af03 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/VectorWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/VectorWriter.java
@@ -30,7 +30,6 @@ import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.VectorValues;
-import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
/** Writes vectors to an index. */
@@ -246,11 +245,6 @@ public abstract class VectorWriter implements Closeable {
return subs.get(0).values.searchStrategy();
}
- @Override
- public TopDocs search(float[] target, int k, int fanout) throws IOException {
- throw new UnsupportedOperationException();
- }
-
class MergerRandomAccess implements RandomAccessVectorValues {
private final List<RandomAccessVectorValues> raSubs;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java
index 4dc25cb..2a9452b 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90VectorReader.java
@@ -154,7 +154,36 @@ public final class Lucene90VectorReader extends VectorReader {
if (info == null) {
throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
}
- fields.put(info.name, readField(meta));
+
+ FieldEntry fieldEntry = readField(meta);
+ validateFieldEntry(info, fieldEntry);
+ fields.put(info.name, fieldEntry);
+ }
+ }
+
+ private void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
+ int dimension = info.getVectorDimension();
+ if (dimension != fieldEntry.dimension) {
+ throw new IllegalStateException(
+ "Inconsistent vector dimension for field=\""
+ + info.name
+ + "\"; "
+ + dimension
+ + " != "
+ + fieldEntry.dimension);
+ }
+
+ long numBytes = (long) fieldEntry.size() * dimension * Float.BYTES;
+ if (numBytes != fieldEntry.vectorDataLength) {
+ throw new IllegalStateException(
+ "Vector data length "
+ + fieldEntry.vectorDataLength
+ + " not matching size="
+ + fieldEntry.size()
+ + " * dim="
+ + dimension
+ + " * 4 = "
+ + numBytes);
}
}
@@ -199,40 +228,47 @@ public final class Lucene90VectorReader extends VectorReader {
@Override
public VectorValues getVectorValues(String field) throws IOException {
- FieldInfo info = fieldInfos.fieldInfo(field);
- if (info == null) {
+ FieldEntry fieldEntry = fields.get(field);
+ if (fieldEntry == null || fieldEntry.dimension == 0) {
return null;
}
- int dimension = info.getVectorDimension();
- if (dimension == 0) {
- return VectorValues.EMPTY;
- }
+
+ return getOffHeapVectorValues(fieldEntry);
+ }
+
+ @Override
+ public TopDocs search(String field, float[] target, int k, int fanout) throws IOException {
FieldEntry fieldEntry = fields.get(field);
- if (fieldEntry == null) {
- // There is a FieldInfo, but no vectors. Should we have deleted the FieldInfo?
+ if (fieldEntry == null || fieldEntry.dimension == 0) {
return null;
}
- if (dimension != fieldEntry.dimension) {
- throw new IllegalStateException(
- "Inconsistent vector dimension for field=\""
- + field
- + "\"; "
- + dimension
- + " != "
- + fieldEntry.dimension);
- }
- long numBytes = (long) fieldEntry.size() * dimension * Float.BYTES;
- if (numBytes != fieldEntry.vectorDataLength) {
- throw new IllegalStateException(
- "Vector data length "
- + fieldEntry.vectorDataLength
- + " not matching size="
- + fieldEntry.size()
- + " * dim="
- + dimension
- + " * 4 = "
- + numBytes);
+
+ OffHeapVectorValues vectorValues = getOffHeapVectorValues(fieldEntry);
+
+ // use a seed that is fixed for the index so we get reproducible results for the same query
+ final Random random = new Random(checksumSeed);
+ NeighborQueue results =
+ HnswGraph.search(target, k, k + fanout, vectorValues, getGraphValues(fieldEntry), random);
+ int i = 0;
+ ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(results.size(), k)];
+ boolean reversed = fieldEntry.searchStrategy.reversed;
+ while (results.size() > 0) {
+ int node = results.topNode();
+ float score = results.topScore();
+ results.pop();
+ if (reversed) {
+ score = (float) Math.exp(-score / target.length);
+ }
+ scoreDocs[scoreDocs.length - ++i] = new ScoreDoc(fieldEntry.ordToDoc[node], score);
}
+ // always return >= the case where we can assert == is only when there are fewer than topK
+ // vectors in the index
+ return new TopDocs(
+ new TotalHits(results.visitedCount(), TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO),
+ scoreDocs);
+ }
+
+ private OffHeapVectorValues getOffHeapVectorValues(FieldEntry fieldEntry) throws IOException {
IndexInput bytesSlice =
vectorData.slice("vector-data", fieldEntry.vectorDataOffset, fieldEntry.vectorDataLength);
return new OffHeapVectorValues(fieldEntry, bytesSlice);
@@ -409,32 +445,6 @@ public final class Lucene90VectorReader extends VectorReader {
}
@Override
- public TopDocs search(float[] vector, int topK, int fanout) throws IOException {
- // use a seed that is fixed for the index so we get reproducible results for the same query
- final Random random = new Random(checksumSeed);
- NeighborQueue results =
- HnswGraph.search(
- vector, topK, topK + fanout, randomAccess(), getGraphValues(fieldEntry), random);
- int i = 0;
- ScoreDoc[] scoreDocs = new ScoreDoc[Math.min(results.size(), topK)];
- boolean reversed = searchStrategy().reversed;
- while (results.size() > 0) {
- int node = results.topNode();
- float score = results.topScore();
- results.pop();
- if (reversed) {
- score = (float) Math.exp(-score / vector.length);
- }
- scoreDocs[scoreDocs.length - ++i] = new ScoreDoc(fieldEntry.ordToDoc[node], score);
- }
- // always return >= the case where we can assert == is only when there are fewer than topK
- // vectors in the index
- return new TopDocs(
- new TotalHits(results.visitedCount(), TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO),
- scoreDocs);
- }
-
- @Override
public float[] vectorValue(int targetOrd) throws IOException {
dataIn.seek((long) targetOrd * byteSize);
dataIn.readLEFloats(value, 0, value.length);
diff --git a/lucene/core/src/java/org/apache/lucene/index/CodecReader.java b/lucene/core/src/java/org/apache/lucene/index/CodecReader.java
index a64dc17..e46d4bd 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CodecReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CodecReader.java
@@ -25,6 +25,7 @@ import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.VectorReader;
+import org.apache.lucene.search.TopDocs;
/** LeafReader implemented by codec APIs. */
public abstract class CodecReader extends LeafReader {
@@ -219,6 +220,19 @@ public abstract class CodecReader extends LeafReader {
}
@Override
+ public final TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
+ throws IOException {
+ ensureOpen();
+ FieldInfo fi = getFieldInfos().fieldInfo(field);
+ if (fi == null || fi.getVectorDimension() == 0) {
+ // Field does not exist or does not index vectors
+ return null;
+ }
+
+ return getVectorReader().search(field, target, k, fanout);
+ }
+
+ @Override
protected void doClose() throws IOException {}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValuesLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/DocValuesLeafReader.java
index a740a6a..02b48e8 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocValuesLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocValuesLeafReader.java
@@ -18,6 +18,7 @@
package org.apache.lucene.index;
import java.io.IOException;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Bits;
abstract class DocValuesLeafReader extends LeafReader {
@@ -52,6 +53,12 @@ abstract class DocValuesLeafReader extends LeafReader {
}
@Override
+ public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
+ throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public final void checkIntegrity() throws IOException {
throw new UnsupportedOperationException();
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
index 93e3554..d591ff2 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterLeafReader.java
@@ -18,6 +18,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Iterator;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -344,6 +345,12 @@ public abstract class FilterLeafReader extends LeafReader {
}
@Override
+ public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
+ throws IOException {
+ return in.searchNearestVectors(field, target, k, fanout);
+ }
+
+ @Override
public Fields getTermVectors(int docID) throws IOException {
ensureOpen();
return in.getTermVectors(docID);
diff --git a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
index f8b83dd..b5b7343 100644
--- a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
@@ -17,6 +17,7 @@
package org.apache.lucene.index;
import java.io.IOException;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Bits;
/**
@@ -207,10 +208,29 @@ public abstract class LeafReader extends IndexReader {
/**
* Returns {@link VectorValues} for this field, or null if no {@link VectorValues} were indexed.
* The returned instance should only be used by a single thread.
+ *
+ * @lucene.experimental
*/
public abstract VectorValues getVectorValues(String field) throws IOException;
/**
+ * Return the k nearest neighbor documents as determined by comparison of their vector values for
+ * this field, to the given vector, by the field's search strategy. If the search strategy is
+ * reversed, lower values indicate nearer vectors, otherwise higher scores indicate nearer
+ * vectors. Unlike relevance scores, vector scores may be negative.
+ *
+ * @param field the vector field to search
+ * @param target the vector-valued query
+ * @param k the number of docs to return
+ * @param fanout control the accuracy/speed tradeoff - larger values give better recall at higher
+ * cost
+ * @return the k nearest neighbor documents, along with their (searchStrategy-specific) scores.
+ * @lucene.experimental
+ */
+ public abstract TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
+ throws IOException;
+
+ /**
* Get the {@link FieldInfos} describing all fields in this reader.
*
* <p>Note: Implementations should cache the FieldInfos instance returned by this method such that
diff --git a/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java
index 8d368fb..8413cff 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergeReaderWrapper.java
@@ -24,6 +24,7 @@ import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Bits;
/**
@@ -203,6 +204,12 @@ class MergeReaderWrapper extends LeafReader {
}
@Override
+ public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout)
+ throws IOException {
+ return in.searchNearestVectors(field, target, k, fanout);
+ }
+
+ @Override
public int numDocs() {
return in.numDocs();
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java
index 1316c3b..ff70da2 100644
--- a/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ParallelLeafReader.java
@@ -27,6 +27,7 @@ import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Version;
@@ -392,6 +393,14 @@ public class ParallelLeafReader extends LeafReader {
}
@Override
+ public TopDocs searchNearestVectors(String fieldName, float[] target, int k, int fanout)
+ throws IOException {
+ ensureOpen();
+ LeafReader reader = fieldToReader.get(fieldName);
+ return reader == null ? null : reader.searchNearestVectors(fieldName, target, k, fanout);
+ }
+
+ @Override
public void checkIntegrity() throws IOException {
ensureOpen();
for (LeafReader reader : completeReaderSet) {
diff --git a/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java b/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java
index 1030617..bc2450c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SlowCodecReaderWrapper.java
@@ -27,6 +27,7 @@ import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.VectorReader;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Bits;
/**
@@ -166,6 +167,11 @@ public final class SlowCodecReaderWrapper {
}
@Override
+ public TopDocs search(String field, float[] target, int k, int fanout) throws IOException {
+ return reader.searchNearestVectors(field, target, k, fanout);
+ }
+
+ @Override
public void checkIntegrity() {
// We already checkIntegrity the entire reader up front
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java b/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java
index 33a1143..baf39f1 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java
@@ -33,6 +33,7 @@ import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOSupplier;
@@ -314,6 +315,11 @@ public final class SortingCodecReader extends FilterCodecReader {
}
@Override
+ public TopDocs search(String field, float[] target, int k, int fanout) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
public void close() throws IOException {
delegate.close();
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/VectorValues.java b/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
index caaf6d3..9548b59 100644
--- a/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
@@ -21,8 +21,8 @@ import static org.apache.lucene.util.VectorUtil.dotProduct;
import static org.apache.lucene.util.VectorUtil.squareDistance;
import java.io.IOException;
+import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
/**
@@ -76,28 +76,14 @@ public abstract class VectorValues extends DocIdSetIterator {
}
/**
- * Return the k nearest neighbor documents as determined by comparison of their vector values for
- * this field, to the given vector, by the field's search strategy. If the search strategy is
- * reversed, lower values indicate nearer vectors, otherwise higher scores indicate nearer
- * vectors. Unlike relevance scores, vector scores may be negative.
- *
- * @param target the vector-valued query
- * @param k the number of docs to return
- * @param fanout control the accuracy/speed tradeoff - larger values give better recall at higher
- * cost
- * @return the k nearest neighbor documents, along with their (searchStrategy-specific) scores.
- */
- public abstract TopDocs search(float[] target, int k, int fanout) throws IOException;
-
- /**
* Search strategy. This is a label describing the method used during indexing and searching of
* the vectors in order to determine the nearest neighbors.
*/
public enum SearchStrategy {
/**
- * No search strategy is provided. Note: {@link VectorValues#search(float[], int, int)} is not
- * supported for fields specifying this strategy.
+ * No search strategy is provided. Note: {@link VectorReader#search(String, float[], int, int)}
+ * is not supported for fields specifying this strategy.
*/
NONE,
@@ -183,11 +169,6 @@ public abstract class VectorValues extends DocIdSetIterator {
}
@Override
- public TopDocs search(float[] target, int k, int fanout) {
- throw new UnsupportedOperationException();
- }
-
- @Override
public int docID() {
throw new IllegalStateException("VectorValues is EMPTY, and not positioned on a doc");
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/VectorValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/VectorValuesWriter.java
index e4c74a7..90c8d00 100644
--- a/lucene/core/src/java/org/apache/lucene/index/VectorValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/VectorValuesWriter.java
@@ -24,7 +24,6 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.codecs.VectorWriter;
import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;
@@ -197,11 +196,6 @@ class VectorValuesWriter {
}
@Override
- public TopDocs search(float[] target, int k, int fanout) {
- throw new UnsupportedOperationException();
- }
-
- @Override
public long cost() {
return size();
}
@@ -342,10 +336,5 @@ class VectorValuesWriter {
public long cost() {
return docsWithFieldIter.cost();
}
-
- @Override
- public TopDocs search(float[] target, int k, int fanout) throws IOException {
- throw new UnsupportedOperationException();
- }
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
index 2def856..7a9af2a 100644
--- a/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
+++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/HnswGraph.java
@@ -23,6 +23,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
+import org.apache.lucene.codecs.VectorReader;
import org.apache.lucene.index.KnnGraphValues;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.VectorValues;
@@ -46,10 +47,10 @@ import org.apache.lucene.util.SparseFixedBitSet;
* searching the graph for each newly inserted node.
* <li><code>maxConn</code> has the same meaning as <code>M</code> in the later paper; it controls
* how many of the <code>efConst</code> neighbors are connected to the new node
- * <li><code>fanout</code> the fanout parameter of {@link VectorValues#search(float[], int, int)}
- * is used to control the values of <code>numSeed</code> and <code>topK</code> that are passed
- * to this API. Thus <code>fanout</code> is like a combination of <code>ef</code> (search beam
- * width) from the 2016 paper and <code>m</code> from the 2014 paper.
+ * <li><code>fanout</code> the fanout parameter of {@link VectorReader#search(String, float[],
+ * int, int)} is used to control the values of <code>numSeed</code> and <code>topK</code> that
+ * are passed to this API. Thus <code>fanout</code> is like a combination of <code>ef</code>
+ * (search beam width) from the 2016 paper and <code>m</code> from the 2014 paper.
* </ul>
*
* <p>Note: The graph may be searched by multiple threads concurrently, but updates are not
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java b/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java
index 56a19a3..e466ad9 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestKnnGraph.java
@@ -277,7 +277,7 @@ public class TestKnnGraph extends LuceneTestCase {
private static TopDocs doKnnSearch(IndexReader reader, float[] vector, int k) throws IOException {
TopDocs[] results = new TopDocs[reader.leaves().size()];
for (LeafReaderContext ctx : reader.leaves()) {
- results[ctx.ord] = ctx.reader().getVectorValues(KNN_GRAPH_FIELD).search(vector, k, 10);
+ results[ctx.ord] = ctx.reader().searchNearestVectors(KNN_GRAPH_FIELD, vector, k, 10);
if (ctx.docBase > 0) {
for (ScoreDoc doc : results[ctx.ord].scoreDocs) {
doc.doc += ctx.docBase;
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSegmentToThreadMapping.java b/lucene/core/src/test/org/apache/lucene/index/TestSegmentToThreadMapping.java
index 6a3f28a..54b9e5f 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSegmentToThreadMapping.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSegmentToThreadMapping.java
@@ -28,6 +28,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils;
@@ -110,6 +111,11 @@ public class TestSegmentToThreadMapping extends LuceneTestCase {
}
@Override
+ public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout) {
+ return null;
+ }
+
+ @Override
protected void doClose() {}
@Override
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java
index 94febfb..ba5e759 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java
@@ -420,7 +420,7 @@ public class KnnGraphTester {
IndexReader reader, String field, float[] vector, int k, int fanout) throws IOException {
TopDocs[] results = new TopDocs[reader.leaves().size()];
for (LeafReaderContext ctx : reader.leaves()) {
- results[ctx.ord] = ctx.reader().getVectorValues(field).search(vector, k, fanout);
+ results[ctx.ord] = ctx.reader().searchNearestVectors(field, vector, k, fanout);
int docBase = ctx.docBase;
for (ScoreDoc scoreDoc : results[ctx.ord].scoreDocs) {
scoreDoc.doc += docBase;
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
index d6cb7ab..3f772b0 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
@@ -20,7 +20,6 @@ package org.apache.lucene.util.hnsw;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.VectorValues;
-import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
@@ -100,11 +99,6 @@ class MockVectorValues extends VectorValues
return null;
}
- @Override
- public TopDocs search(float[] target, int k, int fanout) {
- return null;
- }
-
private boolean seek(int target) {
if (target >= 0 && target < values.length && values[target] != null) {
pos = target;
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnsw.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnsw.java
index 26d01d6..e4558a7 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnsw.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnsw.java
@@ -39,7 +39,6 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomAccessVectorValues;
import org.apache.lucene.index.RandomAccessVectorValuesProducer;
import org.apache.lucene.index.VectorValues;
-import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
@@ -347,11 +346,6 @@ public class TestHnsw extends LuceneTestCase {
public BytesRef binaryValue(int ord) {
return null;
}
-
- @Override
- public TopDocs search(float[] target, int k, int fanout) {
- return null;
- }
}
private static float[] unitVector2d(double piRadians) {
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java
index 0b782cf..5d61bca 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TermVectorLeafReader.java
@@ -35,6 +35,7 @@ import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.VectorValues;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Version;
@@ -159,6 +160,11 @@ public class TermVectorLeafReader extends LeafReader {
}
@Override
+ public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout) {
+ return null;
+ }
+
+ @Override
public void checkIntegrity() throws IOException {}
@Override
diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index 352a06e..24e1ec4 100644
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -40,6 +40,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
+import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.ArrayUtil;
@@ -1354,7 +1355,12 @@ public class MemoryIndex {
@Override
public VectorValues getVectorValues(String fieldName) {
- return VectorValues.EMPTY;
+ return null;
+ }
+
+ @Override
+ public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout) {
+ return null;
}
@Override
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
index f308f47..cbeb9b3 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
@@ -216,6 +216,11 @@ public class QueryUtils {
}
@Override
+ public TopDocs searchNearestVectors(String field, float[] target, int k, int fanout) {
+ return null;
+ }
+
+ @Override
public FieldInfos getFieldInfos() {
return FieldInfos.EMPTY;
}