You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2018/12/10 17:09:20 UTC

lucene-solr:branch_7x: LUCENE-8598: Improve field updates packed values

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x f8a897799 -> afa61bd98


LUCENE-8598: Improve field updates packed values

DocValuesFieldUpdats are using compact settings for packet ints that causes
dramatic slowdowns when the updates are finished and sorted. Moving to the default
accepted overhead ratio yields up to 4x improvements in applying updates. This change
also improves the packing of numeric values since we know the value range in advance and
can choose a different packing scheme in such a case.
Overall this change yields a good performance improvement since 99% of the times of applying
DV field updates are spend in the sort method which essentially makes applying the updates
4x faster.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/afa61bd9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/afa61bd9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/afa61bd9

Branch: refs/heads/branch_7x
Commit: afa61bd989cc88abed442870593e0226f14237a3
Parents: f8a8977
Author: Simon Willnauer <si...@apache.org>
Authored: Sun Dec 9 19:13:20 2018 +0100
Committer: Simon Willnauer <si...@apache.org>
Committed: Mon Dec 10 18:09:09 2018 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  5 +++-
 .../lucene/index/DocValuesFieldUpdates.java     |  2 +-
 .../apache/lucene/index/FieldUpdatesBuffer.java | 21 +++++++++++++
 .../lucene/index/FrozenBufferedUpdates.java     |  3 +-
 .../index/NumericDocValuesFieldUpdates.java     | 31 ++++++++++++++------
 .../util/packed/AbstractPagedMutable.java       |  3 +-
 .../lucene/index/TestFieldUpdatesBuffer.java    | 17 ++++++++++-
 7 files changed, 67 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/afa61bd9/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1ad7a6c..d4d8061 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -46,7 +46,10 @@ Optimizations
 
 * LUCENE-8590: BufferedUpdates now uses an optimized storage for buffering docvalues updates that
   can safe up to 80% of the heap used compared to the previous implementation and uses non-object
-  based datastructures. (Simon Willnauer, Mike McCandless, Shai Erera, Adrien Grant)
+  based datastructures. (Simon Willnauer, Mike McCandless, Shai Erera, Adrien Grand)
+
+* LUCENE-8598: Moved to the default accepted overhead ratio for packet ints in DocValuesFieldUpdats
+  yields an up-to 4x performance improvement when applying doc values updates. (Simon Willnauer, Adrien Grand)
 
 Other
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/afa61bd9/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java b/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java
index 7c2c5b1..9ab3b7c 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java
@@ -251,7 +251,7 @@ abstract class DocValuesFieldUpdates implements Accountable {
     }
     this.type = type;
     bitsPerValue = PackedInts.bitsRequired(maxDoc - 1) + SHIFT;
-    docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.COMPACT);
+    docs = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.DEFAULT);
   }
 
   final boolean getFinished() {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/afa61bd9/lucene/core/src/java/org/apache/lucene/index/FieldUpdatesBuffer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldUpdatesBuffer.java b/lucene/core/src/java/org/apache/lucene/index/FieldUpdatesBuffer.java
index 31a91bb..d9db980 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FieldUpdatesBuffer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldUpdatesBuffer.java
@@ -55,6 +55,8 @@ final class FieldUpdatesBuffer {
   private int[] docsUpTo;
   private long[] numericValues; // this will be null if we are buffering binaries
   private FixedBitSet hasValues;
+  private long maxNumeric = Long.MIN_VALUE;
+  private long minNumeric = Long.MAX_VALUE;
   private String[] fields;
   private final boolean isNumeric;
 
@@ -82,6 +84,7 @@ final class FieldUpdatesBuffer {
     this(bytesUsed, initialValue, docUpTo, true);
     if (initialValue.hasValue()) {
       numericValues = new long[] {initialValue.getValue()};
+      maxNumeric = minNumeric = initialValue.getValue();
     } else {
       numericValues = new long[] {0};
     }
@@ -95,6 +98,22 @@ final class FieldUpdatesBuffer {
     }
   }
 
+  long getMaxNumeric() {
+    assert isNumeric;
+    if (minNumeric == Long.MAX_VALUE && maxNumeric == Long.MIN_VALUE) {
+      return 0; // we don't have any value;
+    }
+    return maxNumeric;
+  }
+
+  long getMinNumeric() {
+    assert isNumeric;
+    if (minNumeric == Long.MAX_VALUE && maxNumeric == Long.MIN_VALUE) {
+      return 0; // we don't have any value
+    }
+    return minNumeric;
+  }
+
   void add(String field, int docUpTo, int ord, boolean hasValue) {
     if (fields[0].equals(field) == false || fields.length != 1 ) {
       if (fields.length <= ord) {
@@ -144,6 +163,8 @@ final class FieldUpdatesBuffer {
     final int ord = append(term);
     String field = term.field;
     add(field, docUpTo, ord, true);
+    minNumeric = Math.min(minNumeric, value);
+    maxNumeric = Math.max(maxNumeric, value);
     if (numericValues[0] != value || numericValues.length != 1) {
       if (numericValues.length <= ord) {
         long[] array = ArrayUtil.grow(numericValues, ord+1);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/afa61bd9/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java
index 5cda75f..9174f03 100644
--- a/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedUpdates.java
@@ -541,7 +541,8 @@ final class FrozenBufferedUpdates {
                     .SingleValueNumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc(),
                     value.getNumericValue(0));
               } else {
-                dvUpdates = new NumericDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());
+                dvUpdates = new NumericDocValuesFieldUpdates(delGen, updateField, value.getMinNumeric(),
+                    value.getMaxNumeric(), segState.reader.maxDoc());
               }
             } else {
               dvUpdates = new BinaryDocValuesFieldUpdates(delGen, updateField, segState.reader.maxDoc());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/afa61bd9/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java
index ebc196b..550a86a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesFieldUpdates.java
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.AbstractPagedMutable;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PagedGrowableWriter;
 import org.apache.lucene.util.packed.PagedMutable;
@@ -31,15 +32,16 @@ import org.apache.lucene.util.packed.PagedMutable;
  * @lucene.experimental
  */
 final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
-
   // TODO: can't this just be NumericDocValues now?  avoid boxing the long value...
   final static class Iterator extends DocValuesFieldUpdates.AbstractIterator {
-    private final PagedGrowableWriter values;
+    private final AbstractPagedMutable values;
+    private final long minValue;
     private long value;
 
-    Iterator(int size, PagedGrowableWriter values, PagedMutable docs, long delGen) {
+    Iterator(int size, long minValue, AbstractPagedMutable values, PagedMutable docs, long delGen) {
       super(size, docs, delGen);
       this.values = values;
+      this.minValue = minValue;
     }
     @Override
     long longValue() {
@@ -53,14 +55,25 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
 
     @Override
     protected void set(long idx) {
-      value = values.get(idx);
+      value = values.get(idx) + minValue;
     }
   }
-  private PagedGrowableWriter values;
+  private AbstractPagedMutable values;
+  private final long minValue;
+
+  NumericDocValuesFieldUpdates(long delGen, String field, int maxDoc) {
+    super(maxDoc, delGen, field, DocValuesType.NUMERIC);
+    // we don't know the min/max range so we use the growable writer here to adjust as we go.
+    values = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.DEFAULT);
+    minValue = 0;
+  }
 
-  public NumericDocValuesFieldUpdates(long delGen, String field, int maxDoc) {
+  NumericDocValuesFieldUpdates(long delGen, String field, long minValue, long maxValue, int maxDoc) {
     super(maxDoc, delGen, field, DocValuesType.NUMERIC);
-    values = new PagedGrowableWriter(1, PAGE_SIZE, 1, PackedInts.FAST);
+    assert minValue <= maxValue : "minValue must be <= maxValue [" + minValue + " > " + maxValue + "]";
+    int bitsPerValue = PackedInts.unsignedBitsRequired(maxValue - minValue);
+    values = new PagedMutable(1, PAGE_SIZE, bitsPerValue, PackedInts.DEFAULT);
+    this.minValue = minValue;
   }
   @Override
   void add(int doc, BytesRef value) {
@@ -75,7 +88,7 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
   @Override
   synchronized void add(int doc, long value) {
     int add = add(doc);
-    values.set(add, value);
+    values.set(add, value-minValue);
   }
 
   @Override
@@ -101,7 +114,7 @@ final class NumericDocValuesFieldUpdates extends DocValuesFieldUpdates {
   @Override
   Iterator iterator() {
     ensureFinished();
-    return new Iterator(size, values, docs, delGen);
+    return new Iterator(size, minValue, values, docs, delGen);
   }
   
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/afa61bd9/lucene/core/src/java/org/apache/lucene/util/packed/AbstractPagedMutable.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/AbstractPagedMutable.java b/lucene/core/src/java/org/apache/lucene/util/packed/AbstractPagedMutable.java
index e73863b..536f129 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/AbstractPagedMutable.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/AbstractPagedMutable.java
@@ -29,7 +29,7 @@ import org.apache.lucene.util.RamUsageEstimator;
  * Base implementation for {@link PagedMutable} and {@link PagedGrowableWriter}.
  * @lucene.internal
  */
-abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends LongValues implements Accountable {
+public abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends LongValues implements Accountable {
 
   static final int MIN_BLOCK_SIZE = 1 << 6;
   static final int MAX_BLOCK_SIZE = 1 << 30;
@@ -161,5 +161,4 @@ abstract class AbstractPagedMutable<T extends AbstractPagedMutable<T>> extends L
   public final String toString() {
     return getClass().getSimpleName() + "(size=" + size() + ",pageSize=" + pageSize() + ")";
   }
-
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/afa61bd9/lucene/core/src/test/org/apache/lucene/index/TestFieldUpdatesBuffer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFieldUpdatesBuffer.java b/lucene/core/src/test/org/apache/lucene/index/TestFieldUpdatesBuffer.java
index 832c7cc..d6270ee 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestFieldUpdatesBuffer.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestFieldUpdatesBuffer.java
@@ -30,7 +30,7 @@ import org.apache.lucene.util.TestUtil;
 
 public class TestFieldUpdatesBuffer extends LuceneTestCase {
 
-  public void testBascis() throws IOException {
+  public void testBasics() throws IOException {
     Counter counter = Counter.newCounter();
     DocValuesUpdate.NumericDocValuesUpdate update =
         new DocValuesUpdate.NumericDocValuesUpdate(new Term("id", "1"), "age", 6);
@@ -44,6 +44,8 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
     buffer.addUpdate(new Term("id", "8"), 12, 16);
     assertFalse(buffer.hasSingleValue());
     assertTrue(buffer.isNumeric());
+    assertEquals(13, buffer.getMaxNumeric());
+    assertEquals(6, buffer.getMinNumeric());
     FieldUpdatesBuffer.BufferedUpdateIterator iterator = buffer.iterator();
     FieldUpdatesBuffer.BufferedUpdate value = iterator.next();
     assertNotNull(value);
@@ -217,8 +219,12 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
     FieldUpdatesBuffer.BufferedUpdate value;
 
     int count = 0;
+    long min = Long.MAX_VALUE;
+    long max = Long.MIN_VALUE;
     while ((value = iterator.next()) != null) {
       long v = buffer.getNumericValue(count);
+      min = Math.min(min, v);
+      max = Math.max(max, v);
       randomUpdate = updates.get(count++);
       assertEquals(randomUpdate.term.bytes.utf8ToString(), value.termValue.utf8ToString());
       assertEquals(randomUpdate.term.field, value.termField);
@@ -232,7 +238,16 @@ public class TestFieldUpdatesBuffer extends LuceneTestCase {
       }
       assertEquals(randomUpdate.docIDUpto, value.docUpTo);
     }
+    assertEquals(max, buffer.getMaxNumeric());
+    assertEquals(min, buffer.getMinNumeric());
     assertEquals(count, updates.size());
   }
 
+  public void testNoNumericValue() {
+    DocValuesUpdate.NumericDocValuesUpdate update =
+        new DocValuesUpdate.NumericDocValuesUpdate(new Term("id", "1"), "age", null);
+    FieldUpdatesBuffer buffer = new FieldUpdatesBuffer(Counter.newCounter(), update, update.docIDUpto);
+    assertEquals(0, buffer.getMinNumeric());
+    assertEquals(0, buffer.getMaxNumeric());
+  }
 }