You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by lu...@apache.org on 2022/06/15 04:58:40 UTC
[lucene] branch branch_9x updated: LUCENE-10598: (backport) SortedSetDocValues#docValueCount() should be always greater than zero (#957)
This is an automated email from the ASF dual-hosted git repository.
luxugang pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 90b5d5383f1 LUCENE-10598: (backport) SortedSetDocValues#docValueCount() should be always greater than zero (#957)
90b5d5383f1 is described below
commit 90b5d5383f1ced8d567dc02462ac7632a5e5949d
Author: Lu Xugang <q1...@Gmail.com>
AuthorDate: Wed Jun 15 12:58:36 2022 +0800
LUCENE-10598: (backport) SortedSetDocValues#docValueCount() should be always greater than zero (#957)
* LUCENE-10598: SortedSetDocValues#docValueCount() should be always greater than zero (#934)
* LUCENE-10598: Use count to record docValueCount similar to SortedNumericDocValues did (#942)
* Fix docValueCount() on Lucene70 sorted set doc values.
---
lucene/CHANGES.txt | 2 ++
.../lucene70/Lucene70DocValuesProducer.java | 29 +++++++++++++++-------
.../lucene80/Lucene80DocValuesProducer.java | 12 ++++++---
.../codecs/lucene90/Lucene90DocValuesProducer.java | 2 +-
.../java/org/apache/lucene/index/CheckIndex.java | 28 +++++++++++++++++++++
.../apache/lucene/index/SortedSetDocValues.java | 5 ++--
.../lucene90/TestLucene90DocValuesFormat.java | 2 ++
.../apache/lucene/index/TestMultiDocValues.java | 1 +
.../lucene/index/TestSortingCodecReader.java | 1 +
.../lucene/index/memory/TestMemoryIndex.java | 1 +
10 files changed, 67 insertions(+), 16 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 80c375a43e8..d9e9ac8b691 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -99,6 +99,8 @@ Bug Fixes
* LUCENE-10605: Fix error in 32bit jvm object alignment gap calculation (Sun Wuqiang)
+* LUCENE-10598: SortedSetDocValues#docValueCount() should be always greater than zero. (Lu Xugang)
+
Other
---------------------
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70DocValuesProducer.java
index b71fe2b073e..a0b53316e21 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70DocValuesProducer.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene70/Lucene70DocValuesProducer.java
@@ -1367,8 +1367,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
return new BaseSortedSetDocValues(entry, data) {
int doc = -1;
- long start;
- long end;
+ long start, end;
+ long count;
@Override
public int nextDoc() throws IOException {
@@ -1392,6 +1392,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
}
start = addresses.get(target);
end = addresses.get(target + 1L);
+ count = (end - start);
return doc = target;
}
@@ -1399,6 +1400,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
public boolean advanceExact(int target) throws IOException {
start = addresses.get(target);
end = addresses.get(target + 1L);
+ count = (end - start);
doc = target;
return true;
}
@@ -1413,7 +1415,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
@Override
public long docValueCount() {
- return end - start;
+ return count;
}
};
} else {
@@ -1426,6 +1428,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
boolean set;
long start;
long end = 0;
+ long count;
@Override
public int nextDoc() throws IOException {
@@ -1455,15 +1458,22 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
return disi.advanceExact(target);
}
- @Override
- public long nextOrd() throws IOException {
+ private boolean set() {
if (set == false) {
final int index = disi.index();
- final long start = addresses.get(index);
- this.start = start + 1;
+ start = addresses.get(index);
end = addresses.get(index + 1L);
+ count = end - start;
set = true;
- return ords.get(start);
+ return true;
+ }
+ return false;
+ }
+
+ @Override
+ public long nextOrd() throws IOException {
+ if (set()) {
+ return ords.get(start++);
} else if (start == end) {
return NO_MORE_ORDS;
} else {
@@ -1473,7 +1483,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer {
@Override
public long docValueCount() {
- return end - start;
+ set();
+ return count;
}
};
}
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java
index 32a898db09d..60c6b36fbc6 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene80/Lucene80DocValuesProducer.java
@@ -1560,8 +1560,8 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
return new BaseSortedSetDocValues(entry, data) {
int doc = -1;
- long start;
- long end;
+ long start, end;
+ long count;
@Override
public int nextDoc() throws IOException {
@@ -1585,6 +1585,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
}
start = addresses.get(target);
end = addresses.get(target + 1L);
+ count = (end - start);
return doc = target;
}
@@ -1592,6 +1593,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
public boolean advanceExact(int target) throws IOException {
start = addresses.get(target);
end = addresses.get(target + 1L);
+ count = (end - start);
doc = target;
return true;
}
@@ -1606,7 +1608,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
@Override
public long docValueCount() {
- return end - start;
+ return count;
}
};
} else {
@@ -1624,6 +1626,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
boolean set;
long start;
long end = 0;
+ long count;
@Override
public int nextDoc() throws IOException {
@@ -1658,6 +1661,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
final int index = disi.index();
start = addresses.get(index);
end = addresses.get(index + 1L);
+ count = end - start;
set = true;
return true;
}
@@ -1678,7 +1682,7 @@ final class Lucene80DocValuesProducer extends DocValuesProducer {
@Override
public long docValueCount() {
set();
- return end - start;
+ return count;
}
};
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
index cb5a27aa033..2bb71aa32aa 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
@@ -1454,7 +1454,7 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
@Override
public long docValueCount() {
- return count;
+ return ords.docValueCount();
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index e6bb6413112..81e9eb2cd48 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -3337,13 +3337,34 @@ public final class CheckIndex implements Closeable {
LongBitSet seenOrds = new LongBitSet(dv.getValueCount());
long maxOrd2 = -1;
for (int docID = dv.nextDoc(); docID != NO_MORE_DOCS; docID = dv.nextDoc()) {
+ long count = dv.docValueCount();
+ if (count == 0) {
+ throw new CheckIndexException(
+ "sortedset dv for field: "
+ + fieldName
+ + " returned docValueCount=0 for docID="
+ + docID);
+ }
if (dv2.advanceExact(docID) == false) {
throw new CheckIndexException("advanceExact did not find matching doc ID: " + docID);
}
+ long count2 = dv2.docValueCount();
+ if (count != count2) {
+ throw new CheckIndexException(
+ "advanceExact reports different value count: " + count + " != " + count2);
+ }
long lastOrd = -1;
long ord;
int ordCount = 0;
while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ if (count != dv.docValueCount()) {
+ throw new CheckIndexException(
+ "value count changed from "
+ + count
+ + " to "
+ + dv.docValueCount()
+ + " during iterating over all values");
+ }
long ord2 = dv2.nextOrd();
if (ord != ord2) {
throw new CheckIndexException(
@@ -3361,6 +3382,13 @@ public final class CheckIndex implements Closeable {
seenOrds.set(ord);
ordCount++;
}
+ if (dv.docValueCount() != dv2.docValueCount()) {
+ throw new CheckIndexException(
+ "dv and dv2 report different values count after iterating over all values: "
+ + dv.docValueCount()
+ + " != "
+ + dv2.docValueCount());
+ }
if (ordCount == 0) {
throw new CheckIndexException(
"dv for field: " + fieldName + " returned docID=" + docID + " yet has no ordinals");
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
index a44b8b0b560..4ebd938551e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
@@ -45,8 +45,9 @@ public abstract class SortedSetDocValues extends DocValuesIterator {
public abstract long nextOrd() throws IOException;
/**
- * Retrieves the number of values for the current document. This must always be greater than zero.
- * It is illegal to call this method after {@link #advanceExact(int)} returned {@code false}.
+ * Retrieves the number of unique ords for the current document. This must always be greater than
+ * zero. It is illegal to call this method after {@link #advanceExact(int)} returned {@code
+ * false}.
*/
public abstract long docValueCount();
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormat.java
index 2a11fefe0af..ca8fbb14e5b 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90DocValuesFormat.java
@@ -265,6 +265,7 @@ public class TestLucene90DocValuesFormat extends BaseCompressingDocValuesFormatT
assertTrue(valueSet.contains(sortedNumeric.nextValue()));
}
assertEquals(i, sortedSet.nextDoc());
+ assertEquals(valueSet.size(), sortedSet.docValueCount());
int sortedSetCount = 0;
while (true) {
long ord = sortedSet.nextOrd();
@@ -488,6 +489,7 @@ public class TestLucene90DocValuesFormat extends BaseCompressingDocValuesFormatT
for (int i = 0; i < maxDoc; ++i) {
assertEquals(i, values.nextDoc());
final int numValues = in.readVInt();
+ assertEquals(numValues, values.docValueCount());
for (int j = 0; j < numValues; ++j) {
b.setLength(in.readVInt());
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java b/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java
index 018d7592bf1..9672fdac034 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestMultiDocValues.java
@@ -279,6 +279,7 @@ public class TestMultiDocValues extends LuceneTestCase {
if (docID == NO_MORE_DOCS) {
break;
}
+ assertEquals(single.docValueCount(), multi.docValueCount());
ArrayList<Long> expectedList = new ArrayList<>();
long ord;
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java b/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java
index 09a8e9ee36e..42a898d87b4 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestSortingCodecReader.java
@@ -230,6 +230,7 @@ public class TestSortingCodecReader extends LuceneTestCase {
assertEquals(
new BytesRef(ids.longValue() + ""),
sorted_set_dv.lookupOrd(sorted_set_dv.nextOrd()));
+ assertEquals(1, sorted_set_dv.docValueCount());
assertEquals(1, sorted_numeric_dv.docValueCount());
assertEquals(ids.longValue(), sorted_numeric_dv.nextValue());
diff --git a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
index 4ce329a62ea..206c66b2516 100644
--- a/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
+++ b/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
@@ -297,6 +297,7 @@ public class TestMemoryIndex extends LuceneTestCase {
SortedSetDocValues sortedSetDocValues = leafReader.getSortedSetDocValues("sorted_set");
assertEquals(3, sortedSetDocValues.getValueCount());
assertEquals(0, sortedSetDocValues.nextDoc());
+ assertEquals(3, sortedSetDocValues.docValueCount());
assertEquals(0L, sortedSetDocValues.nextOrd());
assertEquals(1L, sortedSetDocValues.nextOrd());
assertEquals(2L, sortedSetDocValues.nextOrd());