You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2021/08/26 07:44:59 UTC

[lucene] branch main updated: LUCENE-9613, LUCENE-10067: Further specialize ordinals. (#260)

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 2d7590a  LUCENE-9613, LUCENE-10067: Further specialize ordinals. (#260)
2d7590a is described below

commit 2d7590a3555c5afb205bc781cd227d0c9e3d47a3
Author: Adrien Grand <jp...@gmail.com>
AuthorDate: Thu Aug 26 09:44:24 2021 +0200

    LUCENE-9613, LUCENE-10067: Further specialize ordinals. (#260)
---
 .../codecs/lucene90/Lucene90DocValuesConsumer.java | 24 ++++--
 .../codecs/lucene90/Lucene90DocValuesProducer.java | 98 ++++++++++++++++++++++
 2 files changed, 117 insertions(+), 5 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java
index e914698..6a22eff 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesConsumer.java
@@ -139,7 +139,8 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
           public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
             return DocValues.singleton(valuesProducer.getNumeric(field));
           }
-        });
+        },
+        false);
   }
 
   private static class MinMaxTracker {
@@ -177,13 +178,14 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
     }
   }
 
-  private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
+  private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer, boolean ords)
+      throws IOException {
     SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
     int numDocsWithValue = 0;
     MinMaxTracker minMax = new MinMaxTracker();
     MinMaxTracker blockMinMax = new MinMaxTracker();
     long gcd = 0;
-    Set<Long> uniqueValues = new HashSet<>();
+    Set<Long> uniqueValues = ords ? null : new HashSet<>();
     for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
       for (int i = 0, count = values.docValueCount(); i < count; ++i) {
         long v = values.nextValue();
@@ -216,6 +218,17 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
     minMax.finish();
     blockMinMax.finish();
 
+    if (ords && minMax.numValues > 0) {
+      if (minMax.min != 0) {
+        throw new IllegalStateException(
+            "The min value for ordinals should always be 0, got " + minMax.min);
+      }
+      if (minMax.max != 0 && gcd != 1) {
+        throw new IllegalStateException(
+            "GCD compression should never be used on ordinals, found gcd=" + gcd);
+      }
+    }
+
     final long numValues = minMax.numValues;
     long min = minMax.min;
     final long max = minMax.max;
@@ -508,7 +521,8 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
                 };
             return DocValues.singleton(sortedOrds);
           }
-        });
+        },
+        true);
     addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
   }
 
@@ -669,7 +683,7 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
 
   private void doAddSortedNumericField(FieldInfo field, DocValuesProducer valuesProducer)
       throws IOException {
-    long[] stats = writeValues(field, valuesProducer);
+    long[] stats = writeValues(field, valuesProducer, false);
     int numDocsWithField = Math.toIntExact(stats[0]);
     long numValues = stats[1];
     assert numValues >= numDocsWithField;
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
index 5a21582..eb58502 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java
@@ -780,6 +780,104 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
   }
 
   private SortedDocValues getSorted(SortedEntry entry) throws IOException {
+    // Specialize the common case for ordinals: single block of packed integers.
+    final NumericEntry ordsEntry = entry.ordsEntry;
+    if (ordsEntry.blockShift < 0 // single block
+        && ordsEntry.bitsPerValue > 0) { // more than 1 value
+
+      if (ordsEntry.gcd != 1 || ordsEntry.minValue != 0 || ordsEntry.table != null) {
+        throw new IllegalStateException("Ordinals shouldn't use GCD, offset or table compression");
+      }
+
+      final RandomAccessInput slice =
+          data.randomAccessSlice(ordsEntry.valuesOffset, ordsEntry.valuesLength);
+      final LongValues values = DirectReader.getInstance(slice, ordsEntry.bitsPerValue);
+
+      if (ordsEntry.docsWithFieldOffset == -1) { // dense
+        return new BaseSortedDocValues(entry, data) {
+
+          private final int maxDoc = Lucene90DocValuesProducer.this.maxDoc;
+          private int doc = -1;
+
+          @Override
+          public int ordValue() throws IOException {
+            return (int) values.get(doc);
+          }
+
+          @Override
+          public boolean advanceExact(int target) throws IOException {
+            doc = target;
+            return true;
+          }
+
+          @Override
+          public int docID() {
+            return doc;
+          }
+
+          @Override
+          public int nextDoc() throws IOException {
+            return advance(doc + 1);
+          }
+
+          @Override
+          public int advance(int target) throws IOException {
+            if (target >= maxDoc) {
+              return doc = NO_MORE_DOCS;
+            }
+            return doc = target;
+          }
+
+          @Override
+          public long cost() {
+            return maxDoc;
+          }
+        };
+      } else if (ordsEntry.docsWithFieldOffset >= 0) { // sparse but non-empty
+        final IndexedDISI disi =
+            new IndexedDISI(
+                data,
+                ordsEntry.docsWithFieldOffset,
+                ordsEntry.docsWithFieldLength,
+                ordsEntry.jumpTableEntryCount,
+                ordsEntry.denseRankPower,
+                ordsEntry.numValues);
+
+        return new BaseSortedDocValues(entry, data) {
+
+          @Override
+          public int ordValue() throws IOException {
+            return (int) values.get(disi.index());
+          }
+
+          @Override
+          public boolean advanceExact(int target) throws IOException {
+            return disi.advanceExact(target);
+          }
+
+          @Override
+          public int docID() {
+            return disi.docID();
+          }
+
+          @Override
+          public int nextDoc() throws IOException {
+            return disi.nextDoc();
+          }
+
+          @Override
+          public int advance(int target) throws IOException {
+            return disi.advance(target);
+          }
+
+          @Override
+          public long cost() {
+            return disi.cost();
+          }
+        };
+      }
+    }
+
     final NumericDocValues ords = getNumeric(entry.ordsEntry);
     return new BaseSortedDocValues(entry, data) {