You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2018/12/18 16:45:37 UTC
lucene-solr:master: LUCENE-8600: Use a faster sort in
DocValuesFieldUpdates.
Repository: lucene-solr
Updated Branches:
refs/heads/master d185ba99d -> dcd4a288b
LUCENE-8600: Use a faster sort in DocValuesFieldUpdates.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/dcd4a288
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/dcd4a288
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/dcd4a288
Branch: refs/heads/master
Commit: dcd4a288b4e19c854f5f8430691061badfdc92b6
Parents: d185ba9
Author: Adrien Grand <jp...@gmail.com>
Authored: Mon Dec 17 15:21:17 2018 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Tue Dec 18 17:45:17 2018 +0100
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 ++
.../lucene/index/DocValuesFieldUpdates.java | 63 ++++++++++++++++----
2 files changed, 54 insertions(+), 13 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dcd4a288/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 09f9011..bbb550b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -286,6 +286,10 @@ Optimizations
* LUCENE-8599: Use sparse bitset to store docs in SingleValueDocValuesFieldUpdates.
(Simon Willnauer, Adrien Grand)
+* LUCENE-8600: Doc-value updates get applied faster by sorting with quicksort,
+ rather than an in-place mergesort, which needs to perform fewer swaps.
+ (Adrien Grand)
+
Other
* LUCENE-8573: BKDWriter now uses FutureArrays#mismatch to compute shared prefixes.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/dcd4a288/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java b/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java
index 464a7f2..883cf52 100644
--- a/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocValuesFieldUpdates.java
@@ -21,7 +21,7 @@ import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.InPlaceMergeSorter;
+import org.apache.lucene.util.IntroSorter;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.SparseFixedBitSet;
@@ -289,20 +289,57 @@ abstract class DocValuesFieldUpdates implements Accountable {
if (size < docs.size()) {
resize(size);
}
- new InPlaceMergeSorter() {
- @Override
- protected void swap(int i, int j) {
- DocValuesFieldUpdates.this.swap(i, j);
+ if (size > 0) {
+ // We need a stable sort but InPlaceMergeSorter performs lots of swaps
+ // which hurts performance due to all the packed ints we are using.
+ // Another option would be TimSorter, but it needs additional API (copy to
+ // temp storage, compare with item in temp storage, etc.) so we instead
+ // use quicksort and record ords of each update to guarantee stability.
+ final PackedInts.Mutable ords = PackedInts.getMutable(size, PackedInts.bitsRequired(size - 1), PackedInts.DEFAULT);
+ for (int i = 0; i < size; ++i) {
+ ords.set(i, i);
}
+ new IntroSorter() {
+ @Override
+ protected void swap(int i, int j) {
+ final long tmpOrd = ords.get(i);
+ ords.set(i, ords.get(j));
+ ords.set(j, tmpOrd);
- @Override
- protected int compare(int i, int j) {
- // increasing docID order:
- // NOTE: we can have ties here, when the same docID was updated in the same segment, in which case we rely on sort being
- // stable and preserving original order so the last update to that docID wins
- return Long.compare(docs.get(i)>>>1, docs.get(j)>>>1);
- }
- }.sort(0, size);
+ DocValuesFieldUpdates.this.swap(i, j);
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ // increasing docID order:
+ // NOTE: we can have ties here, when the same docID was updated in the same segment, in which case we rely on sort being
+ // stable and preserving original order so the last update to that docID wins
+ int cmp = Long.compare(docs.get(i)>>>1, docs.get(j)>>>1);
+ if (cmp == 0) {
+ cmp = (int) (ords.get(i) - ords.get(j));
+ }
+ return cmp;
+ }
+
+ long pivotDoc;
+ int pivotOrd;
+
+ @Override
+ protected void setPivot(int i) {
+ pivotDoc = docs.get(i) >>> 1;
+ pivotOrd = (int) ords.get(i);
+ }
+
+ @Override
+ protected int comparePivot(int j) {
+ int cmp = Long.compare(pivotDoc, docs.get(j) >>> 1);
+ if (cmp == 0) {
+ cmp = pivotOrd - (int) ords.get(j);
+ }
+ return cmp;
+ }
+ }.sort(0, size);
+ }
}
/** Returns true if this instance contains any updates. */