You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2021/08/26 07:44:59 UTC
[lucene] branch main updated: LUCENE-9613,
LUCENE-10067: Further specialize ordinals. (#260)
This is an automated email from the ASF dual-hosted git repository.
jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/main by this push:
new 2d7590a LUCENE-9613, LUCENE-10067: Further specialize ordinals. (#260)
2d7590a is described below
commit 2d7590a3555c5afb205bc781cd227d0c9e3d47a3
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Thu Aug 26 09:44:24 2021 +0200
LUCENE-9613, LUCENE-10067: Further specialize ordinals. (#260)
---
.../codecs/lucene90/Lucene90DocValuesConsumer.java | 24 ++++--
.../codecs/lucene90/Lucene90DocValuesProducer.java | 98 ++++++++++++++++++++++
2 files changed, 117 insertions(+), 5 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java
index e914698..6a22eff 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java
@@ -139,7 +139,8 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
return DocValues.singleton(valuesProducer.getNumeric(field));
}
- });
+ },
+ false);
}
private static class MinMaxTracker {
@@ -177,13 +178,14 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
}
}
- private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
+ private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, boolean ords)
+ throws IOException {
SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
int numDocsWithValue = 0;
MinMaxTracker minMax = new MinMaxTracker();
MinMaxTracker blockMinMax = new MinMaxTracker();
long gcd = 0;
- Set<Long> uniqueValues = new HashSet<>();
+ Set<Long> uniqueValues = ords ? null : new HashSet<>();
for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
for (int i = 0, count = values.docValueCount(); i < count; ++i) {
long v = values.nextValue();
@@ -216,6 +218,17 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
minMax.finish();
blockMinMax.finish();
+ if (ords && minMax.numValues > 0) {
+ if (minMax.min != 0) {
+ throw new IllegalStateException(
+ "The min value for ordinals should always be 0, got " + minMax.min);
+ }
+ if (minMax.max != 0 && gcd != 1) {
+ throw new IllegalStateException(
+ "GCD compression should never be used on ordinals, found gcd=" + gcd);
+ }
+ }
+
final long numValues = minMax.numValues;
long min = minMax.min;
final long max = minMax.max;
@@ -508,7 +521,8 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
};
return DocValues.singleton(sortedOrds);
}
- });
+ },
+ true);
addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
}
@@ -669,7 +683,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
private void doAddSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer)
throws IOException {
- long[] stats = writeValues(field, valuesProducer);
+ long[] stats = writeValues(field, valuesProducer, false);
int numDocsWithField = Math.toIntExact(stats[0]);
long numValues = stats[1];
assert numValues >= numDocsWithField;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
index 5a21582..eb58502 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
@@ -780,6 +780,104 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
private SortedDocValues getSorted(SortedEntry entry) throws IOException {
+ // Specialize the common case for ordinals: single block of packed integers.
+ final NumericEntry ordsEntry = entry.ordsEntry;
+ if (ordsEntry.blockShift < 0 // single block
+ && ordsEntry.bitsPerValue > 0) { // more than 1 value
+
+ if (ordsEntry.gcd != 1 || ordsEntry.minValue != 0 || ordsEntry.table != null) {
+ throw new IllegalStateException("Ordinals shouldn't use GCD, offset or table compression");
+ }
+
+ final RandomAccessInput slice =
+ data.randomAccessSlice(ordsEntry.valuesOffset, ordsEntry.valuesLength);
+ final LongValues values = DirectReader.getInstance(slice, ordsEntry.bitsPerValue);
+
+ if (ordsEntry.docsWithFieldOffset == -1) { // dense
+ return new BaseSortedDocValues(entry, data) {
+
+ private final int maxDoc = Lucene90DocValuesProducer.this.maxDoc;
+ private int doc = -1;
+
+ @Override
+ public int ordValue() throws IOException {
+ return (int) values.get(doc);
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ doc = target;
+ return true;
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return advance(doc + 1);
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ if (target >= maxDoc) {
+ return doc = NO_MORE_DOCS;
+ }
+ return doc = target;
+ }
+
+ @Override
+ public long cost() {
+ return maxDoc;
+ }
+ };
+ } else if (ordsEntry.docsWithFieldOffset >= 0) { // sparse but non-empty
+ final IndexedDISI disi =
+ new IndexedDISI(
+ data,
+ ordsEntry.docsWithFieldOffset,
+ ordsEntry.docsWithFieldLength,
+ ordsEntry.jumpTableEntryCount,
+ ordsEntry.denseRankPower,
+ ordsEntry.numValues);
+
+ return new BaseSortedDocValues(entry, data) {
+
+ @Override
+ public int ordValue() throws IOException {
+ return (int) values.get(disi.index());
+ }
+
+ @Override
+ public boolean advanceExact(int target) throws IOException {
+ return disi.advanceExact(target);
+ }
+
+ @Override
+ public int docID() {
+ return disi.docID();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return disi.nextDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return disi.advance(target);
+ }
+
+ @Override
+ public long cost() {
+ return disi.cost();
+ }
+ };
+ }
+ }
+
final NumericDocValues ords = getNumeric(entry.ordsEntry);
return new BaseSortedDocValues(entry, data) {