You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by lu...@apache.org on 2022/06/15 04:58:40 UTC

[lucene] branch branch_9x updated: LUCENE-10598: (backport) SortedSetDocValues#docValueCount() should be always greater than zero (#957)

This is an automated email from the ASF dual-hosted git repository.

luxugang pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 90b5d5383f1 LUCENE-10598: (backport) SortedSetDocValues#docValueCount() should be always greater than zero (#957)
90b5d5383f1 is described below

commit 90b5d5383f1ced8d567dc02462ac7632a5e5949d
Author: Lu Xugang <q1...@Gmail.com>
AuthorDate: Wed Jun 15 12:58:36 2022 +0800

    LUCENE-10598: (backport) SortedSetDocValues#docValueCount() should be always greater than zero (#957)
    
    * LUCENE-10598: SortedSetDocValues#docValueCount() should be always greater than zero (#934)
    
    * LUCENE-10598: Use count to record docValueCount similar to SortedNumericDocValues did (#942)
    
    * Fix docValueCount() on Lucene70  sorted set doc values.
---
 lucene/CHANGES.txt                                 |  2 ++
 .../lucene70/Lucene70DocValuesProducer.java        | 29 +++++++++++++++-------
 .../lucene80/Lucene80DocValuesProducer.java        | 12 ++++++---
 .../codecs/lucene90/Lucene90DocValuesProducer.java |  2 +-
 .../java/org/apache/lucene/index/CheckIndex.java   | 28 +++++++++++++++++++++
 .../apache/lucene/index/SortedSetDocValues.java    |  5 ++--
 .../lucene90/TestLucene90DocValuesFormat.java      |  2 ++
 .../apache/lucene/index/TestMultiDocValues.java    |  1 +
 .../lucene/index/TestSortingCodecReader.java       |  1 +
 .../lucene/index/memory/TestMemoryIndex.java       |  1 +
 10 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 80c375a43e8..d9e9ac8b691 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -99,6 +99,8 @@ Bug Fixes
 
 * LUCENE-10605: Fix error in 32bit jvm object alignment gap calculation (Sun Wuqiang)
 
+* LUCENE-10598: SortedSetDocValues#docValueCount() should be always greater than zero. (Lu Xugang)
+
 Other
 ---------------------
 
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70DocValuesProducer.java
index b71fe2b073e..a0b53316e21 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70DocValuesProducer.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70DocValuesProducer.java
@@ -1367,8 +1367,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
       return new BaseSortedSetDocValues(entry, data) {
 
         int doc = -1;
-        long start;
-        long end;
+        long start, end;
+        long count;
 
         @Override
         public int nextDoc() throws IOException {
@@ -1392,6 +1392,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
           }
           start = addresses.get(target);
           end = addresses.get(target + 1L);
+          count = (end - start);
           return doc = target;
         }
 
@@ -1399,6 +1400,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
         public boolean advanceExact(int target) throws IOException {
           start = addresses.get(target);
           end = addresses.get(target + 1L);
+          count = (end - start);
           doc = target;
           return true;
         }
@@ -1413,7 +1415,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
 
         @Override
         public long docValueCount() {
-          return end - start;
+          return count;
         }
       };
     } else {
@@ -1426,6 +1428,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
         boolean set;
         long start;
         long end = 0;
+        long count;
 
         @Override
         public int nextDoc() throws IOException {
@@ -1455,15 +1458,22 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
           return disi.advanceExact(target);
         }
 
-        @Override
-        public long nextOrd() throws IOException {
+        private boolean set() {
           if (set == false) {
             final int index = disi.index();
-            final long start = addresses.get(index);
-            this.start = start + 1;
+            start = addresses.get(index);
             end = addresses.get(index + 1L);
+            count = end - start;
             set = true;
-            return ords.get(start);
+            return true;
+          }
+          return false;
+        }
+
+        @Override
+        public long nextOrd() throws IOException {
+          if (set()) {
+            return ords.get(start++);
           } else if (start == end) {
             return NO_MORE_ORDS;
           } else {
@@ -1473,7 +1483,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
 
         @Override
         public long docValueCount() {
-          return end - start;
+          set();
+          return count;
         }
       };
     }
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java
index 32a898db09d..60c6b36fbc6 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java
@@ -1560,8 +1560,8 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
       return new BaseSortedSetDocValues(entry, data) {
 
         int doc = -1;
-        long start;
-        long end;
+        long start, end;
+        long count;
 
         @Override
         public int nextDoc() throws IOException {
@@ -1585,6 +1585,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
           }
           start = addresses.get(target);
           end = addresses.get(target + 1L);
+          count = (end - start);
           return doc = target;
         }
 
@@ -1592,6 +1593,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
         public boolean advanceExact(int target) throws IOException {
           start = addresses.get(target);
           end = addresses.get(target + 1L);
+          count = (end - start);
           doc = target;
           return true;
         }
@@ -1606,7 +1608,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
 
         @Override
         public long docValueCount() {
-          return end - start;
+          return count;
         }
       };
     } else {
@@ -1624,6 +1626,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
         boolean set;
         long start;
         long end = 0;
+        long count;
 
         @Override
         public int nextDoc() throws IOException {
@@ -1658,6 +1661,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
             final int index = disi.index();
             start = addresses.get(index);
             end = addresses.get(index + 1L);
+            count = end - start;
             set = true;
             return true;
           }
@@ -1678,7 +1682,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
         @Override
         public long docValueCount() {
           set();
-          return end - start;
+          return count;
         }
       };
     }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
index cb5a27aa033..2bb71aa32aa 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
@@ -1454,7 +1454,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
 
       @Override
       public long docValueCount() {
-        return count;
+        return ords.docValueCount();
       }
 
       @Override
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index e6bb6413112..81e9eb2cd48 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -3337,13 +3337,34 @@ public final class CheckIndex implements Closeable {
     LongBitSet seenOrds = new LongBitSet(dv.getValueCount());
     long maxOrd2 = -1;
     for (int docID = dv.nextDoc(); docID != NO_MORE_DOCS; docID = dv.nextDoc()) {
+      long count = dv.docValueCount();
+      if (count == 0) {
+        throw new CheckIndexException(
+            "sortedset dv for field: "
+                + fieldName
+                + " returned docValueCount=0 for docID="
+                + docID);
+      }
       if (dv2.advanceExact(docID) == false) {
         throw new CheckIndexException("advanceExact did not find matching doc ID: " + docID);
       }
+      long count2 = dv2.docValueCount();
+      if (count != count2) {
+        throw new CheckIndexException(
+            "advanceExact reports different value count: " + count + " != " + count2);
+      }
       long lastOrd = -1;
       long ord;
       int ordCount = 0;
       while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+        if (count != dv.docValueCount()) {
+          throw new CheckIndexException(
+              "value count changed from "
+                  + count
+                  + " to "
+                  + dv.docValueCount()
+                  + " during iterating over all values");
+        }
         long ord2 = dv2.nextOrd();
         if (ord != ord2) {
           throw new CheckIndexException(
@@ -3361,6 +3382,13 @@ public final class CheckIndex implements Closeable {
         seenOrds.set(ord);
         ordCount++;
       }
+      if (dv.docValueCount() != dv2.docValueCount()) {
+        throw new CheckIndexException(
+            "dv and dv2 report different values count after iterating over all values: "
+                + dv.docValueCount()
+                + " != "
+                + dv2.docValueCount());
+      }
       if (ordCount == 0) {
         throw new CheckIndexException(
             "dv for field: " + fieldName + " returned docID=" + docID + " yet has no ordinals");
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
index a44b8b0b560..4ebd938551e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
@@ -45,8 +45,9 @@ public abstract class SortedSetDocValues extends DocValuesIterator {
   public abstract long nextOrd() throws IOException;
 
   /**
-   * Retrieves the number of values for the current document. This must always be greater than zero.
-   * It is illegal to call this method after {@link #advanceExact(int)} returned {@code false}.
+   * Retrieves the number of unique ords for the current document. This must always be greater than
+   * zero. It is illegal to call this method after {@link #advanceExact(int)} returned {@code
+   * false}.
    */
   public abstract long docValueCount();
 
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormat.java
index 2a11fefe0af..ca8fbb14e5b 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormat.java
@@ -265,6 +265,7 @@ public class TestLucene90DocValuesFormat extends BaseCompressingDocValuesFormatT
             assertTrue(valueSet.contains(sortedNumeric.nextValue()));
           }
           assertEquals(i, sortedSet.nextDoc());
+          assertEquals(valueSet.size(), sortedSet.docValueCount());
           int sortedSetCount = 0;
           while (true) {
             long ord = sortedSet.nextOrd();
@@ -488,6 +489,7 @@ public class TestLucene90DocValuesFormat extends BaseCompressingDocValuesFormatT
       for (int i = 0; i < maxDoc; ++i) {
         assertEquals(i, values.nextDoc());
         final int numValues = in.readVInt();
+        assertEquals(numValues, values.docValueCount());
 
         for (int j = 0; j < numValues; ++j) {
           b.setLength(in.readVInt());
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java b/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java
index 018d7592bf1..9672fdac034 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java
@@ -279,6 +279,7 @@ public class TestMultiDocValues extends LuceneTestCase {
         if (docID == NO_MORE_DOCS) {
           break;
         }
+        assertEquals(single.docValueCount(), multi.docValueCount());
 
         ArrayList<Long> expectedList = new ArrayList<>();
         long ord;
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java b/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java
index 09a8e9ee36e..42a898d87b4 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java
@@ -230,6 +230,7 @@ public class TestSortingCodecReader extends LuceneTestCase {
               assertEquals(
                   new BytesRef(ids.longValue() + ""),
                   sorted_set_dv.lookupOrd(sorted_set_dv.nextOrd()));
+              assertEquals(1, sorted_set_dv.docValueCount());
               assertEquals(1, sorted_numeric_dv.docValueCount());
               assertEquals(ids.longValue(), sorted_numeric_dv.nextValue());
 
diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
index 4ce329a62ea..206c66b2516 100644
--- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
+++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
@@ -297,6 +297,7 @@ public class TestMemoryIndex extends LuceneTestCase {
     SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
     assertEquals(3, sortedSetDocValues.getValueCount());
     assertEquals(0, sortedSetDocValues.nextDoc());
+    assertEquals(3, sortedSetDocValues.docValueCount());
     assertEquals(0L, sortedSetDocValues.nextOrd());
     assertEquals(1L, sortedSetDocValues.nextOrd());
     assertEquals(2L, sortedSetDocValues.nextOrd());