You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/11/23 10:05:05 UTC

[lucene] branch main updated: Enforce VectorValues.cost() is equal to size(). (#11962)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 802774641ad Enforce VectorValues.cost() is equal to size(). (#11962)
802774641ad is described below

commit 802774641add7d08d1b02a16b5439b81a64f493c
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Wed Nov 23 11:05:00 2022 +0100

    Enforce VectorValues.cost() is equal to size(). (#11962)
    
    `VectorValues` have a `cost()` method that reports an approximate number of
    documents that have a vector, but also a `size()` method that reports the
    accurate number of vectors in the field. Since KNN vectors only support
    single-valued fields we should enforce that `cost()` returns the `size()`.
---
 lucene/CHANGES.txt                                         |  3 +++
 .../lucene90/Lucene90HnswVectorsReader.java                |  5 -----
 .../lucene91/Lucene91HnswVectorsReader.java                |  5 -----
 .../backward_codecs/lucene92/OffHeapVectorValues.java      | 10 ----------
 .../codecs/simpletext/SimpleTextKnnVectorsReader.java      |  5 -----
 .../java/org/apache/lucene/codecs/KnnVectorsWriter.java    |  5 -----
 .../apache/lucene/codecs/lucene94/OffHeapVectorValues.java | 10 ----------
 .../org/apache/lucene/index/BufferingKnnVectorsWriter.java |  5 -----
 .../java/org/apache/lucene/index/FilterVectorValues.java   |  5 -----
 .../src/java/org/apache/lucene/index/VectorValues.java     | 14 ++++++--------
 .../test/org/apache/lucene/util/hnsw/MockVectorValues.java |  5 -----
 .../test/org/apache/lucene/util/hnsw/TestHnswGraph.java    |  5 -----
 12 files changed, 9 insertions(+), 68 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 098d9a89749..6d85af8eac0 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -123,6 +123,9 @@ API Changes
   necessary and also illegal as it reported a number of dimensions equal to
   zero. (Adrien Grand)
 
+* GITHUB#11962: VectorValues#cost() now delegates to VectorValues#size().
+  (Adrien Grand)
+
 New Features
 ---------------------
 * GITHUB#11795: Add ByteWritesTrackingDirectoryWrapper to expose metrics for bytes merged, flushed, and overall
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
index 12cca5af7d1..83a3b090bdb 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene90/Lucene90HnswVectorsReader.java
@@ -443,11 +443,6 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
       return doc;
     }
 
-    @Override
-    public long cost() {
-      return ordToDoc.length;
-    }
-
     @Override
     public RandomAccessVectorValues copy() {
       return new OffHeapVectorValues(dimension, ordToDoc, dataIn.clone());
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
index 3eaac9e5ecb..d889ff4fbda 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene91/Lucene91HnswVectorsReader.java
@@ -495,11 +495,6 @@ public final class Lucene91HnswVectorsReader extends KnnVectorsReader {
       return doc;
     }
 
-    @Override
-    public long cost() {
-      return size;
-    }
-
     @Override
     public RandomAccessVectorValues copy() {
       return new OffHeapVectorValues(dimension, size, ordToDoc, dataIn.clone());
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene92/OffHeapVectorValues.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene92/OffHeapVectorValues.java
index 0f913c45b7a..88968e42238 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene92/OffHeapVectorValues.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene92/OffHeapVectorValues.java
@@ -59,11 +59,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
     return size;
   }
 
-  @Override
-  public long cost() {
-    return size;
-  }
-
   @Override
   public float[] vectorValue(int targetOrd) throws IOException {
     slice.seek((long) targetOrd * byteSize);
@@ -286,11 +281,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
       return doc = NO_MORE_DOCS;
     }
 
-    @Override
-    public long cost() {
-      return 0;
-    }
-
     @Override
     public RandomAccessVectorValues copy() throws IOException {
       throw new UnsupportedOperationException();
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java
index ed47670b4c6..4b29ce942d3 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextKnnVectorsReader.java
@@ -333,11 +333,6 @@ public class SimpleTextKnnVectorsReader extends KnnVectorsReader {
       return slowAdvance(target);
     }
 
-    @Override
-    public long cost() {
-      return size();
-    }
-
     private void readAllVectors() throws IOException {
       for (float[] value : values) {
         readVector(value);
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java
index e15c6d8bc07..e45057ef37d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsWriter.java
@@ -179,11 +179,6 @@ public abstract class KnnVectorsWriter implements Accountable, Closeable {
       return size;
     }
 
-    @Override
-    public long cost() {
-      return size;
-    }
-
     @Override
     public int dimension() {
       return subs.get(0).values.dimension();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene94/OffHeapVectorValues.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene94/OffHeapVectorValues.java
index f0084ea1af4..23df75fa022 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene94/OffHeapVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene94/OffHeapVectorValues.java
@@ -59,11 +59,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
     return size;
   }
 
-  @Override
-  public long cost() {
-    return size;
-  }
-
   @Override
   public float[] vectorValue(int targetOrd) throws IOException {
     slice.seek((long) targetOrd * byteSize);
@@ -295,11 +290,6 @@ abstract class OffHeapVectorValues extends VectorValues implements RandomAccessV
       return doc = NO_MORE_DOCS;
     }
 
-    @Override
-    public long cost() {
-      return 0;
-    }
-
     @Override
     public RandomAccessVectorValues copy() throws IOException {
       throw new UnsupportedOperationException();
diff --git a/lucene/core/src/java/org/apache/lucene/index/BufferingKnnVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/index/BufferingKnnVectorsWriter.java
index a1003281b72..3ff02da9810 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BufferingKnnVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BufferingKnnVectorsWriter.java
@@ -280,10 +280,5 @@ public abstract class BufferingKnnVectorsWriter extends KnnVectorsWriter {
     public int advance(int target) {
       throw new UnsupportedOperationException();
     }
-
-    @Override
-    public long cost() {
-      return docsWithFieldIter.cost();
-    }
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterVectorValues.java b/lucene/core/src/java/org/apache/lucene/index/FilterVectorValues.java
index 32dfc37a0f1..6ab47be9753 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilterVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterVectorValues.java
@@ -48,11 +48,6 @@ public abstract class FilterVectorValues extends VectorValues {
     return in.advance(target);
   }
 
-  @Override
-  public long cost() {
-    return in.cost();
-  }
-
   @Override
   public int dimension() {
     return in.dimension();
diff --git a/lucene/core/src/java/org/apache/lucene/index/VectorValues.java b/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
index 2e1357567c9..21945e888e9 100644
--- a/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/VectorValues.java
@@ -39,14 +39,17 @@ public abstract class VectorValues extends DocIdSetIterator {
   public abstract int dimension();
 
   /**
-   * TODO: should we use cost() for this? We rely on its always being exactly the number of
-   * documents having a value for this field, which is not guaranteed by the cost() contract, but in
-   * all the implementations so far they are the same.
+   * Return the number of vectors for this field.
    *
    * @return the number of vectors returned by this iterator
    */
   public abstract int size();
 
+  @Override
+  public final long cost() {
+    return size();
+  }
+
   /**
    * Return the vector value for the current document ID. It is illegal to call this method when the
    * iterator is not positioned: before advancing, or after failing to advance. The returned array
@@ -127,10 +130,5 @@ public abstract class VectorValues extends DocIdSetIterator {
     public int advance(int target) throws IOException {
       throw new UnsupportedOperationException();
     }
-
-    @Override
-    public long cost() {
-      return size();
-    }
   }
 }
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
index 99404df4d11..2ae264fe8ce 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/MockVectorValues.java
@@ -122,9 +122,4 @@ class MockVectorValues extends VectorValues implements RandomAccessVectorValues
     }
     return NO_MORE_DOCS;
   }
-
-  @Override
-  public long cost() {
-    return size();
-  }
 }
diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
index 16d1996820a..1fe585da8cb 100644
--- a/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
+++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/TestHnswGraph.java
@@ -767,11 +767,6 @@ public class TestHnswGraph extends LuceneTestCase {
       return doc;
     }
 
-    @Override
-    public long cost() {
-      return size;
-    }
-
     @Override
     public float[] vectorValue(int ord) {
       return unitVector2d(ord / (double) size, value);