You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2022/05/02 14:51:36 UTC

[lucene] 01/02: LUCENE-10188: Give SortedSetDocValues a docValueCount() (#663)

This is an automated email from the ASF dual-hosted git repository.

mikemccand pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git

commit 782a835405b018fbb447accc495d6fd97e0b0c1d
Author: spike.liu <sp...@outlook.com>
AuthorDate: Mon May 2 22:41:12 2022 +0800

    LUCENE-10188: Give SortedSetDocValues a docValueCount() (#663)
    
    Co-authored-by: vlc刘诚 <ch...@trip.com>
---
 .../backward_codecs/lucene80/Lucene80DocValuesProducer.java    | 10 ++++++++++
 .../lucene/codecs/simpletext/SimpleTextDocValuesReader.java    |  5 +++++
 .../src/java/org/apache/lucene/codecs/DocValuesConsumer.java   |  5 +++++
 .../lucene/codecs/lucene90/Lucene90DocValuesProducer.java      |  5 +++++
 .../java/org/apache/lucene/index/FilterSortedSetDocValues.java |  5 +++++
 .../core/src/java/org/apache/lucene/index/MultiDocValues.java  |  5 +++++
 .../org/apache/lucene/index/SingletonSortedSetDocValues.java   |  5 +++++
 .../src/java/org/apache/lucene/index/SortedSetDocValues.java   |  6 +++++-
 .../java/org/apache/lucene/index/SortedSetDocValuesWriter.java | 10 ++++++++++
 .../org/apache/lucene/search/join/GenericTermsCollector.java   |  5 +++++
 .../src/java/org/apache/lucene/index/memory/MemoryIndex.java   |  5 +++++
 .../org/apache/lucene/tests/index/AssertingLeafReader.java     |  5 +++++
 12 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java
index 118b91a3011..ad047456e14 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java
@@ -1603,6 +1603,11 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
           }
           return ords.get(start++);
         }
+
+        @Override
+        public long docValueCount() {
+          return end - start;
+        }
       };
     } else {
       // sparse
@@ -1663,6 +1668,11 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
             return ords.get(start++);
           }
         }
+
+        @Override
+        public long docValueCount() {
+          return end - start;
+        }
       };
     }
   }
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
index a9b0fd5943e..c5a7c8bca4f 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesReader.java
@@ -727,6 +727,11 @@ class SimpleTextDocValuesReader extends DocValuesProducer {
         }
       }
 
+      @Override
+      public long docValueCount() {
+        return currentOrds.length;
+      }
+
       @Override
       public BytesRef lookupOrd(long ord) throws IOException {
         if (ord < 0 || ord >= field.numValues) {
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
index 132feccebcd..407cba0bb62 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java
@@ -947,6 +947,11 @@ public abstract class DocValuesConsumer implements Closeable {
                 return currentSub.map.get(subOrd);
               }
 
+              @Override
+              public long docValueCount() {
+                return currentSub.values.docValueCount();
+              }
+
               @Override
               public long cost() {
                 return finalCost;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
index f1647d26273..5989334de38 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
@@ -1448,6 +1448,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
         return ords.nextValue();
       }
 
+      @Override
+      public long docValueCount() {
+        return count;
+      }
+
       @Override
       public boolean advanceExact(int target) throws IOException {
         set = false;
diff --git a/lucene/core/src/java/org/apache/lucene/index/FilterSortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/FilterSortedSetDocValues.java
index 9a3706fe4f6..cf970d666c5 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FilterSortedSetDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterSortedSetDocValues.java
@@ -43,6 +43,11 @@ public class FilterSortedSetDocValues extends SortedSetDocValues {
     return in.nextOrd();
   }
 
+  @Override
+  public long docValueCount() {
+    return in.docValueCount();
+  }
+
   @Override
   public BytesRef lookupOrd(long ord) throws IOException {
     return in.lookupOrd(ord);
diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
index a5981425997..b085dd775a9 100644
--- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
@@ -927,6 +927,11 @@ public class MultiDocValues {
       }
     }
 
+    @Override
+    public long docValueCount() {
+      return currentValues.docValueCount();
+    }
+
     @Override
     public BytesRef lookupOrd(long ord) throws IOException {
       int subIndex = mapping.getFirstSegmentNumber(ord);
diff --git a/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java
index 5fcb2136e69..754f3fc231d 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SingletonSortedSetDocValues.java
@@ -57,6 +57,11 @@ final class SingletonSortedSetDocValues extends SortedSetDocValues {
     return v;
   }
 
+  @Override
+  public long docValueCount() {
+    return 1;
+  }
+
   @Override
   public int nextDoc() throws IOException {
     int docID = in.nextDoc();
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
index fad7f52af38..a44b8b0b560 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
@@ -44,7 +44,11 @@ public abstract class SortedSetDocValues extends DocValuesIterator {
    */
   public abstract long nextOrd() throws IOException;
 
-  // TODO: should we have a docValueCount, like SortedNumeric?
+  /**
+   * Retrieves the number of values for the current document. This must always be greater than zero.
+   * It is illegal to call this method after {@link #advanceExact(int)} returned {@code false}.
+   */
+  public abstract long docValueCount();
 
   /**
    * Retrieves the value for the specified ordinal. The returned {@link BytesRef} may be re-used
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
index 6ffc7a18750..eaba6e5bc30 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
@@ -310,6 +310,11 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
       }
     }
 
+    @Override
+    public long docValueCount() {
+      return ordCount;
+    }
+
     @Override
     public long cost() {
       return docsWithField.cost();
@@ -391,6 +396,11 @@ class SortedSetDocValuesWriter extends DocValuesWriter<SortedSetDocValues> {
       }
     }
 
+    @Override
+    public long docValueCount() {
+      return ords.ords.size();
+    }
+
     @Override
     public long cost() {
       return in.cost();
diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GenericTermsCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/GenericTermsCollector.java
index e674a1d1578..383cce1cdb9 100644
--- a/lucene/join/src/java/org/apache/lucene/search/join/GenericTermsCollector.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/GenericTermsCollector.java
@@ -93,6 +93,11 @@ interface GenericTermsCollector extends Collector {
           return target.nextOrd();
         }
 
+        @Override
+        public long docValueCount() {
+          return target.docValueCount();
+        }
+
         @Override
         public BytesRef lookupOrd(long ord) throws IOException {
           final BytesRef val = target.lookupOrd(ord);
diff --git a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index 7171d20ca8b..482cb5fee41 100644
--- a/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -1154,6 +1154,11 @@ public class MemoryIndex {
         return ord++;
       }
 
+      @Override
+      public long docValueCount() {
+        return values.size();
+      }
+
       @Override
       public BytesRef lookupOrd(long ord) throws IOException {
         return values.get(bytesIds[(int) ord], scratch);
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java
index a3f8b9650a1..b464cb7cdc8 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java
@@ -1064,6 +1064,11 @@ public class AssertingLeafReader extends FilterLeafReader {
       return ord;
     }
 
+    @Override
+    public long docValueCount() {
+      return in.docValueCount();
+    }
+
     @Override
     public BytesRef lookupOrd(long ord) throws IOException {
       assertThread("Sorted set doc values", creationThread);