You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2018/12/18 16:45:37 UTC

lucene-solr:master: LUCENE-8600: Use a faster sort in DocValuesFieldUpdates.

Repository: lucene-solr
Updated Branches:
  refs/heads/master d185ba99d -> dcd4a288b


LUCENE-8600: Use a faster sort in DocValuesFieldUpdates.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/dcd4a288
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/dcd4a288
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/dcd4a288

Branch: refs/heads/master
Commit: dcd4a288b4e19c854f5f8430691061badfdc92b6
Parents: d185ba9
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Dec 17 15:21:17 2018 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Tue Dec 18 17:45:17 2018 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 ++
 .../lucene/index/DocValuesFieldUpdates.java     | 63 ++++++++++++++++----
 2 files changed, 54 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dcd4a288/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 09f9011..bbb550b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -286,6 +286,10 @@ Optimizations
 * LUCENE-8599: Use sparse bitset to store docs in SingleValueDocValuesFieldUpdates. 
   (Simon Willnauer, Adrien Grand)
 
+* LUCENE-8600: Doc-value updates get applied faster by sorting with quicksort,
+  rather than an in-place mergesort, which needs to perform fewer swaps.
+  (Adrien Grand)
+
 Other
 
 * LUCENE-8573: BKDWriter now uses FutureArrays#mismatch to compute shared prefixes.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dcd4a288/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java b/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java
index 464a7f2..883cf52 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java
@@ -21,7 +21,7 @@ import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BitSet;
 import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.InPlaceMergeSorter;
+import org.apache.lucene.util.IntroSorter;
 import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.SparseFixedBitSet;
@@ -289,20 +289,57 @@ abstract class DocValuesFieldUpdates implements Accountable {
     if (size < docs.size()) {
       resize(size);
     }
-    new InPlaceMergeSorter() {
-      @Override
-      protected void swap(int i, int j) {
-        DocValuesFieldUpdates.this.swap(i, j);
+    if (size > 0) {
+      // We need a stable sort but InPlaceMergeSorter performs lots of swaps
+      // which hurts performance due to all the packed ints we are using.
+      // Another option would be TimSorter, but it needs additional API (copy to
+      // temp storage, compare with item in temp storage, etc.) so we instead
+      // use quicksort and record ords of each update to guarantee stability.
+      final PackedInts.Mutable ords = PackedInts.getMutable(size, PackedInts.bitsRequired(size - 1), PackedInts.DEFAULT);
+      for (int i = 0; i < size; ++i) {
+        ords.set(i, i);
       }
+      new IntroSorter() {
+        @Override
+        protected void swap(int i, int j) {
+          final long tmpOrd = ords.get(i);
+          ords.set(i, ords.get(j));
+          ords.set(j, tmpOrd);
 
-      @Override
-      protected int compare(int i, int j) {
-        // increasing docID order:
-        // NOTE: we can have ties here, when the same docID was updated in the same segment, in which case we rely on sort being
-        // stable and preserving original order so the last update to that docID wins
-        return Long.compare(docs.get(i)>>>1, docs.get(j)>>>1);
-      }
-    }.sort(0, size);
+          DocValuesFieldUpdates.this.swap(i, j);
+        }
+
+        @Override
+        protected int compare(int i, int j) {
+          // increasing docID order:
+          // NOTE: we can have ties here, when the same docID was updated in the same segment, in which case we rely on sort being
+          // stable and preserving original order so the last update to that docID wins
+          int cmp = Long.compare(docs.get(i)>>>1, docs.get(j)>>>1);
+          if (cmp == 0) {
+            cmp = (int) (ords.get(i) - ords.get(j));
+          }
+          return cmp;
+        }
+
+        long pivotDoc;
+        int pivotOrd;
+
+        @Override
+        protected void setPivot(int i) {
+          pivotDoc = docs.get(i) >>> 1;
+          pivotOrd = (int) ords.get(i);
+        }
+
+        @Override
+        protected int comparePivot(int j) {
+          int cmp = Long.compare(pivotDoc, docs.get(j) >>> 1);
+          if (cmp == 0) {
+            cmp = pivotOrd - (int) ords.get(j);
+          }
+          return cmp;
+        }
+      }.sort(0, size);
+    }
   }
 
   /** Returns true if this instance contains any updates. */