You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2022/01/06 08:27:22 UTC

[lucene] 01/02: LUCENE-10354: Clarify contract of codec APIs with missing/disabled fields. (#583)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit 6c1b9f74e855b32c40be892abcd9c143e35bc30b
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Wed Jan 5 18:47:35 2022 +0100

    LUCENE-10354: Clarify contract of codec APIs with missing/disabled fields. (#583)
---
 .../org/apache/lucene/codecs/DocValuesProducer.java  | 18 +++++++++++++-----
 .../org/apache/lucene/codecs/KnnVectorsReader.java   | 10 +++++++++-
 .../java/org/apache/lucene/codecs/NormsProducer.java |  3 ++-
 .../java/org/apache/lucene/codecs/PointsReader.java  |  6 +++++-
 .../codecs/lucene90/Lucene90HnswVectorsReader.java   |  8 +-------
 .../java/org/apache/lucene/index/CodecReader.java    | 12 +++++-------
 .../codecs/asserting/AssertingKnnVectorsFormat.java  | 20 +++++++++++++-------
 .../codecs/asserting/AssertingPointsFormat.java      | 12 +++++++++---
 8 files changed, 57 insertions(+), 32 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
index 96d381a..2731bcc 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesProducer.java
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
 import java.io.Closeable;
 import java.io.IOException;
 import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedDocValues;
@@ -37,31 +38,38 @@ public abstract class DocValuesProducer implements Closeable {
 
   /**
    * Returns {@link NumericDocValues} for this field. The returned instance need not be thread-safe:
-   * it will only be used by a single thread.
+   * it will only be used by a single thread. The behavior is undefined if the doc values type of
+   * the given field is not {@link DocValuesType#NUMERIC}. The return value is never {@code null}.
    */
   public abstract NumericDocValues getNumeric(FieldInfo field) throws IOException;
 
   /**
    * Returns {@link BinaryDocValues} for this field. The returned instance need not be thread-safe:
-   * it will only be used by a single thread.
+   * it will only be used by a single thread. The behavior is undefined if the doc values type of
+   * the given field is not {@link DocValuesType#BINARY}. The return value is never {@code null}.
    */
   public abstract BinaryDocValues getBinary(FieldInfo field) throws IOException;
 
   /**
    * Returns {@link SortedDocValues} for this field. The returned instance need not be thread-safe:
-   * it will only be used by a single thread.
+   * it will only be used by a single thread. The behavior is undefined if the doc values type of
+   * the given field is not {@link DocValuesType#SORTED}. The return value is never {@code null}.
    */
   public abstract SortedDocValues getSorted(FieldInfo field) throws IOException;
 
   /**
    * Returns {@link SortedNumericDocValues} for this field. The returned instance need not be
-   * thread-safe: it will only be used by a single thread.
+   * thread-safe: it will only be used by a single thread. The behavior is undefined if the doc
+   * values type of the given field is not {@link DocValuesType#SORTED_NUMERIC}. The return value is
+   * never {@code null}.
    */
   public abstract SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException;
 
   /**
    * Returns {@link SortedSetDocValues} for this field. The returned instance need not be
-   * thread-safe: it will only be used by a single thread.
+   * thread-safe: it will only be used by a single thread. The behavior is undefined if the doc
+   * values type of the given field is not {@link DocValuesType#SORTED_SET}. The return value is
+   * never {@code null}.
    */
   public abstract SortedSetDocValues getSortedSet(FieldInfo field) throws IOException;
 
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java
index d89e5c7..6407559 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs;
 
 import java.io.Closeable;
 import java.io.IOException;
+import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.VectorValues;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.util.Accountable;
@@ -40,7 +41,11 @@ public abstract class KnnVectorsReader implements Closeable, Accountable {
    */
   public abstract void checkIntegrity() throws IOException;
 
-  /** Returns the {@link VectorValues} for the given {@code field} */
+  /**
+   * Returns the {@link VectorValues} for the given {@code field}. The behavior is undefined if the
+   * given field doesn't have KNN vectors enabled on its {@link FieldInfo}. The return value is
+   * never {@code null}.
+   */
   public abstract VectorValues getVectorValues(String field) throws IOException;
 
   /**
@@ -53,6 +58,9 @@ public abstract class KnnVectorsReader implements Closeable, Accountable {
    * true k closest neighbors. For large values of k (for example when k is close to the total
    * number of documents), the search may also retrieve fewer than k documents.
    *
+   * <p>The behavior is undefined if the given field doesn't have KNN vectors enabled on its {@link
+   * FieldInfo}. The return value is never {@code null}.
+   *
    * @param field the vector field to search
    * @param target the vector-valued query
    * @param k the number of docs to return
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/NormsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/NormsProducer.java
index 6df7d7e..064e841 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/NormsProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/NormsProducer.java
@@ -33,7 +33,8 @@ public abstract class NormsProducer implements Closeable {
 
   /**
    * Returns {@link NumericDocValues} for this field. The returned instance need not be thread-safe:
-   * it will only be used by a single thread.
+   * it will only be used by a single thread. The behavior is undefined if the given field doesn't
+   * have norms enabled on its {@link FieldInfo}. The return value is never {@code null}.
    */
   public abstract NumericDocValues getNorms(FieldInfo field) throws IOException;
 
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/PointsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/PointsReader.java
index 241c6dc..cc85dc0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/PointsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PointsReader.java
@@ -18,6 +18,7 @@ package org.apache.lucene.codecs;
 
 import java.io.Closeable;
 import java.io.IOException;
+import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.PointValues;
 
 /**
@@ -40,7 +41,10 @@ public abstract class PointsReader implements Closeable {
    */
   public abstract void checkIntegrity() throws IOException;
 
-  /** Return {@link PointValues} for the given {@code field}. */
+  /**
+   * Return {@link PointValues} for the given {@code field}. The behavior is undefined if the given
+   * field doesn't have points enabled on its {@link FieldInfo}.
+   */
   public abstract PointValues getValues(String field) throws IOException;
 
   /**
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
index b0ac8a9..2ae1bc4 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90HnswVectorsReader.java
@@ -226,19 +226,13 @@ public final class Lucene90HnswVectorsReader extends KnnVectorsReader {
   @Override
   public VectorValues getVectorValues(String field) throws IOException {
     FieldEntry fieldEntry = fields.get(field);
-    if (fieldEntry == null || fieldEntry.dimension == 0) {
-      return null;
-    }
-
     return getOffHeapVectorValues(fieldEntry);
   }
 
   @Override
   public TopDocs search(String field, float[] target, int k, Bits acceptDocs) throws IOException {
     FieldEntry fieldEntry = fields.get(field);
-    if (fieldEntry == null || fieldEntry.dimension == 0) {
-      return null;
-    }
+
     if (fieldEntry.size() == 0) {
       return new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), new ScoreDoc[0]);
     }
diff --git a/lucene/core/src/java/org/apache/lucene/index/CodecReader.java b/lucene/core/src/java/org/apache/lucene/index/CodecReader.java
index 064cdbf..9c05098 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CodecReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CodecReader.java
@@ -105,15 +105,13 @@ public abstract class CodecReader extends LeafReader {
 
   @Override
   public final Terms terms(String field) throws IOException {
-    // ensureOpen(); no; getPostingsReader calls this
-    FieldsProducer fieldsProducer = getPostingsReader();
-    if (fieldsProducer == null) {
+    ensureOpen();
+    FieldInfo fi = getFieldInfos().fieldInfo(field);
+    if (fi == null || fi.getIndexOptions() == IndexOptions.NONE) {
+      // Field does not exist or does not index postings
       return null;
     }
-    // We could check the FieldInfo IndexOptions but there's no point since
-    //   PostingsReader will simply return null for fields that don't exist or that have no terms
-    // index.
-    return fieldsProducer.terms(field);
+    return getPostingsReader().terms(field);
   }
 
   // returns the FieldInfo that corresponds to the given field and type, or
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java
index 6b17d36..55b17d5 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java
@@ -22,6 +22,7 @@ import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.codecs.KnnVectorsWriter;
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.VectorValues;
@@ -45,7 +46,7 @@ public class AssertingKnnVectorsFormat extends KnnVectorsFormat {
 
   @Override
   public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
-    return new AssertingKnnVectorsReader(delegate.fieldsReader(state));
+    return new AssertingKnnVectorsReader(delegate.fieldsReader(state), state.fieldInfos);
   }
 
   static class AssertingKnnVectorsWriter extends KnnVectorsWriter {
@@ -76,10 +77,12 @@ public class AssertingKnnVectorsFormat extends KnnVectorsFormat {
 
   static class AssertingKnnVectorsReader extends KnnVectorsReader {
     final KnnVectorsReader delegate;
+    final FieldInfos fis;
 
-    AssertingKnnVectorsReader(KnnVectorsReader delegate) {
+    AssertingKnnVectorsReader(KnnVectorsReader delegate, FieldInfos fis) {
       assert delegate != null;
       this.delegate = delegate;
+      this.fis = fis;
     }
 
     @Override
@@ -89,17 +92,20 @@ public class AssertingKnnVectorsFormat extends KnnVectorsFormat {
 
     @Override
     public VectorValues getVectorValues(String field) throws IOException {
+      FieldInfo fi = fis.fieldInfo(field);
+      assert fi != null && fi.getVectorDimension() > 0;
       VectorValues values = delegate.getVectorValues(field);
-      if (values != null) {
-        assert values.docID() == -1;
-        assert values.size() >= 0;
-        assert values.dimension() > 0;
-      }
+      assert values != null;
+      assert values.docID() == -1;
+      assert values.size() >= 0;
+      assert values.dimension() > 0;
       return values;
     }
 
     @Override
     public TopDocs search(String field, float[] target, int k, Bits acceptDocs) throws IOException {
+      FieldInfo fi = fis.fieldInfo(field);
+      assert fi != null && fi.getVectorDimension() > 0;
       TopDocs hits = delegate.search(field, target, k, acceptDocs);
       assert hits != null;
       assert hits.scoreDocs.length <= k;
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingPointsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingPointsFormat.java
index 2b68a59..eada1e8 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingPointsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingPointsFormat.java
@@ -21,6 +21,7 @@ import org.apache.lucene.codecs.PointsFormat;
 import org.apache.lucene.codecs.PointsReader;
 import org.apache.lucene.codecs.PointsWriter;
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.SegmentReadState;
@@ -54,18 +55,21 @@ public final class AssertingPointsFormat extends PointsFormat {
 
   @Override
   public PointsReader fieldsReader(SegmentReadState state) throws IOException {
-    return new AssertingPointsReader(state.segmentInfo.maxDoc(), in.fieldsReader(state), false);
+    return new AssertingPointsReader(
+        state.segmentInfo.maxDoc(), in.fieldsReader(state), state.fieldInfos, false);
   }
 
   static class AssertingPointsReader extends PointsReader {
     private final PointsReader in;
     private final int maxDoc;
+    private final FieldInfos fis;
     private final boolean merging;
     private final Thread creationThread;
 
-    AssertingPointsReader(int maxDoc, PointsReader in, boolean merging) {
+    AssertingPointsReader(int maxDoc, PointsReader in, FieldInfos fis, boolean merging) {
       this.in = in;
       this.maxDoc = maxDoc;
+      this.fis = fis;
       this.merging = merging;
       this.creationThread = Thread.currentThread();
       // do a few simple checks on init
@@ -80,6 +84,8 @@ public final class AssertingPointsFormat extends PointsFormat {
 
     @Override
     public PointValues getValues(String field) throws IOException {
+      FieldInfo fi = fis.fieldInfo(field);
+      assert fi != null && fi.getPointDimensionCount() > 0;
       if (merging) {
         AssertingCodec.assertThread("PointsReader", creationThread);
       }
@@ -97,7 +103,7 @@ public final class AssertingPointsFormat extends PointsFormat {
 
     @Override
     public PointsReader getMergeInstance() {
-      return new AssertingPointsReader(maxDoc, in.getMergeInstance(), true);
+      return new AssertingPointsReader(maxDoc, in.getMergeInstance(), fis, true);
     }
 
     @Override