You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/11/23 10:05:05 UTC
[lucene] branch main updated: Enforce VectorValues.cost() is equal to size(). (#11962)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 802774641ad Enforce VectorValues.cost() is equal to size(). (#11962)
802774641ad is described below
commit 802774641add7d08d1b02a16b5439b81a64f493c
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Wed Nov 23 11:05:00 2022 +0100
Enforce VectorValues.cost() is equal to size(). (#11962)
`VectorValues` have a `cost()` method that reports an approximate number of
documents that have a vector, but also a `size()` method that reports the
accurate number of vectors in the field. Since KNN vectors only support
single-valued fields we should enforce that `cost()` returns the `size()`.
---
lucene/CHANGES.txt | 3 +++
.../lucene90/Lucene90HnswVectorsReader.java | 5 -----
.../lucene91/Lucene91HnswVectorsReader.java | 5 -----
.../backward_codecs/lucene92/OffHeapVectorValues.java | 10 ----------
.../codecs/simpletext/SimpleTextKnnVectorsReader.java | 5 -----
.../java/org/apache/lucene/codecs/KnnVectorsWriter.java | 5 -----
.../apache/lucene/codecs/lucene94/OffHeapVectorValues.java | 10 ----------
.../org/apache/lucene/index/BufferingKnnVectorsWriter.java | 5 -----
.../java/org/apache/lucene/index/FilterVectorValues.java | 5 -----
.../src/java/org/apache/lucene/index/VectorValues.java | 14 ++++++--------
.../test/org/apache/lucene/util/hnsw/MockVectorValues.java | 5 -----
.../test/org/apache/lucene/util/hnsw/TestHnswGraph.java | 5 -----
12 files changed, 9 insertions(+), 68 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 098d9a89749..6d85af8eac0 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -123,6 +123,9 @@ API Changes
necessary and also illegal as it reported a number of dimensions equal to
zero. (Adrien Grand)
+* GITHUB#11962: VectorValues#cost() now delegates to VectorValues#size().
+ (Adrien Grand)
+
New Features
---------------------
* GITHUB#11795: Add ByteWritesTrackingDirectoryWrapper to expose metrics for bytes merged, flushed, and overall
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
index 12cca5af7d1..83a3b090bdb 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
@@ -443,11 +443,6 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
return doc;
}
- @Override
- public long cost() {
- return ordToDoc.length;
- }
-
@Override
public RandomAccessVectorValues copy() {
return new OffHeapVectorValues(dimension, ordToDoc, dataIn.clone());
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
index 3eaac9e5ecb..d889ff4fbda 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
@@ -495,11 +495,6 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
return doc;
}
- @Override
- public long cost() {
- return size;
- }
-
@Override
public RandomAccessVectorValues copy() {
return new OffHeapVectorValues(dimension, size, ordToDoc, dataIn.clone());
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene92/OffHeapVectorValues.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene92/OffHeapVectorValues.java
index 0f913c45b7a..88968e42238 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene92/OffHeapVectorValues.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene92/OffHeapVectorValues.java
@@ -59,11 +59,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
return size;
}
- @Override
- public long cost() {
- return size;
- }
-
@Override
public float[] vectorValue(int targetOrd) throws IOException {
slice.seek((long) targetOrd * byteSize);
@@ -286,11 +281,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
return doc = NO_MORE_DOCS;
}
- @Override
- public long cost() {
- return 0;
- }
-
@Override
public RandomAccessVectorValues copy() throws IOException {
throw new UnsupportedOperationException();
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java
index ed47670b4c6..4b29ce942d3 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java
@@ -333,11 +333,6 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
return slowAdvance(target);
}
- @Override
- public long cost() {
- return size();
- }
-
private void readAllVectors() throws IOException {
for (float[] value : values) {
readVector(value);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java
index e15c6d8bc07..e45057ef37d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java
@@ -179,11 +179,6 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
return size;
}
- @Override
- public long cost() {
- return size;
- }
-
@Override
public int dimension() {
return subs.get(0).values.dimension();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene94/OffHeapVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene94/OffHeapVectorValues.java
index f0084ea1af4..23df75fa022 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene94/OffHeapVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene94/OffHeapVectorValues.java
@@ -59,11 +59,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
return size;
}
- @Override
- public long cost() {
- return size;
- }
-
@Override
public float[] vectorValue(int targetOrd) throws IOException {
slice.seek((long) targetOrd * byteSize);
@@ -295,11 +290,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
return doc = NO_MORE_DOCS;
}
- @Override
- public long cost() {
- return 0;
- }
-
@Override
public RandomAccessVectorValues copy() throws IOException {
throw new UnsupportedOperationException();
diff --git a/lucene/core/src/java/org/apache/lucene/index/BufferingKnnVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/index/BufferingKnnVectorsWriter.java
index a1003281b72..3ff02da9810 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BufferingKnnVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BufferingKnnVectorsWriter.java
@@ -280,10 +280,5 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
public int advance(int target) {
throw new UnsupportedOperationException();
}
-
- @Override
- public long cost() {
- return docsWithFieldIter.cost();
- }
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterVectorValues.java b/lucene/core/src/java/org/apache/lucene/index/FilterVectorValues.java
index 32dfc37a0f1..6ab47be9753 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilterVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterVectorValues.java
@@ -48,11 +48,6 @@ public abstract class FilterVectorValues extends VectorValues {
return in.advance(target);
}
- @Override
- public long cost() {
- return in.cost();
- }
-
@Override
public int dimension() {
return in.dimension();
diff --git a/lucene/core/src/java/org/apache/lucene/index/VectorValues.java b/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
index 2e1357567c9..21945e888e9 100644
--- a/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
@@ -39,14 +39,17 @@ public abstract class VectorValues extends DocIdSetIterator {
public abstract int dimension();
/**
- * TODO: should we use cost() for this? We rely on its always being exactly the number of
- * documents having a value for this field, which is not guaranteed by the cost() contract, but in
- * all the implementations so far they are the same.
+ * Return the number of vectors for this field.
*
* @return the number of vectors returned by this iterator
*/
public abstract int size();
+ @Override
+ public final long cost() {
+ return size();
+ }
+
/**
* Return the vector value for the current document ID. It is illegal to call this method when the
* iterator is not positioned: before advancing, or after failing to advance. The returned array
@@ -127,10 +130,5 @@ public abstract class VectorValues extends DocIdSetIterator {
public int advance(int target) throws IOException {
throw new UnsupportedOperationException();
}
-
- @Override
- public long cost() {
- return size();
- }
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
index 99404df4d11..2ae264fe8ce 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
@@ -122,9 +122,4 @@ class MockVectorValues extends VectorValues implements RandomAccessVectorValues
}
return NO_MORE_DOCS;
}
-
- @Override
- public long cost() {
- return size();
- }
}
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
index 16d1996820a..1fe585da8cb 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
@@ -767,11 +767,6 @@ public class TestHnswGraph extends LuceneTestCase {
return doc;
}
- @Override
- public long cost() {
- return size;
- }
-
@Override
public float[] vectorValue(int ord) {
return unitVector2d(ord / (double) size, value);