You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2016/04/28 18:30:06 UTC

[2/2] lucene-solr:branch_6x: LUCENE-7261: Speed up LSBRadixSorter.

LUCENE-7261: Speed up LSBRadixSorter.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8ca6f665
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8ca6f665
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8ca6f665

Branch: refs/heads/branch_6x
Commit: 8ca6f6651ede19bfaee9051e9b87927685cb9be0
Parents: 6c45977
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Apr 28 18:20:41 2016 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Apr 28 18:26:58 2016 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 ++
 .../org/apache/lucene/util/DocIdSetBuilder.java |  3 +-
 .../org/apache/lucene/util/LSBRadixSorter.java  | 36 +++++++++-----------
 .../apache/lucene/util/TestLSBRadixSorter.java  | 29 ++++++++++------
 .../org/apache/solr/search/DocSetBuilder.java   |  5 ++-
 5 files changed, 43 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8ca6f665/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index d66dfc6..ca5e577 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -71,6 +71,9 @@ Optimizations
 * LUCENE-7237: LRUQueryCache now prefers returning an uncached Scorer than
   waiting on a lock. (Adrien Grand)
 
+* LUCENE-7261: Speed up LSBRadixSorter (which is used by TermsQuery, multi-term
+  queries and point queries). (Adrien Grand)
+
 Bug Fixes
 
 * LUCENE-7127: Fix corner case bugs in GeoPointDistanceQuery. (Robert Muir)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8ca6f665/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
index 58d36cb..f35e584 100644
--- a/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/DocIdSetBuilder.java
@@ -20,6 +20,7 @@ import java.io.IOException;
 
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.packed.PackedInts;
 
 /**
  * A builder of {@link DocIdSet}s.  At first it uses a sparse structure to gather
@@ -174,7 +175,7 @@ public final class DocIdSetBuilder {
         return new BitDocIdSet(bitSet);
       } else {
         LSBRadixSorter sorter = new LSBRadixSorter();
-        sorter.sort(buffer, 0, bufferSize);
+        sorter.sort(PackedInts.bitsRequired(maxDoc - 1), buffer, bufferSize);
         final int l = dedup(buffer, bufferSize);
         assert l <= bufferSize;
         buffer = ArrayUtil.grow(buffer, l + 1);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8ca6f665/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java b/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java
index 22f95b6..4ac6ed1 100644
--- a/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java
@@ -31,9 +31,9 @@ public final class LSBRadixSorter {
   private final int[] histogram = new int[HISTOGRAM_SIZE];
   private int[] buffer = new int[0];
 
-  private static void buildHistogram(int[] array, int off, int len, int[] histogram, int shift) {
+  private static void buildHistogram(int[] array, int len, int[] histogram, int shift) {
     for (int i = 0; i < len; ++i) {
-      final int b = (array[off + i] >>> shift) & 0xFF;
+      final int b = (array[i] >>> shift) & 0xFF;
       histogram[b] += 1;
     }
   }
@@ -47,22 +47,22 @@ public final class LSBRadixSorter {
     }
   }
 
-  private static void reorder(int[] array, int off, int len, int[] histogram, int shift, int[] dest, int destOff) {
+  private static void reorder(int[] array, int len, int[] histogram, int shift, int[] dest) {
     for (int i = 0; i < len; ++i) {
-      final int v = array[off + i];
+      final int v = array[i];
       final int b = (v >>> shift) & 0xFF;
-      dest[destOff + histogram[b]++] = v;
+      dest[histogram[b]++] = v;
     }
   }
 
-  private static boolean sort(int[] array, int off, int len, int[] histogram, int shift, int[] dest, int destOff) {
+  private static boolean sort(int[] array, int len, int[] histogram, int shift, int[] dest) {
     Arrays.fill(histogram, 0);
-    buildHistogram(array, off, len, histogram, shift);
+    buildHistogram(array, len, histogram, shift);
     if (histogram[0] == len) {
       return false;
     }
     sumHistogram(histogram);
-    reorder(array, off, len, histogram, shift, dest, destOff);
+    reorder(array, len, histogram, shift, dest);
     return true;
   }
 
@@ -80,34 +80,32 @@ public final class LSBRadixSorter {
     }
   }
 
-  public void sort(final int[] array, int off, int len) {
+  /** Sort {@code array[0:len]} in place.
+   * @param numBits how many bits are required to store any of the values in
+   *                {@code array[0:len]}. Pass {@code 32} if unknown. */
+  public void sort(int numBits, final int[] array, int len) {
     if (len < INSERTION_SORT_THRESHOLD) {
-      insertionSort(array, off, len);
+      insertionSort(array, 0, len);
       return;
     }
 
     buffer = ArrayUtil.grow(buffer, len);
 
     int[] arr = array;
-    int arrOff = off;
 
     int[] buf = buffer;
-    int bufOff = 0;
-    
-    for (int shift = 0; shift <= 24; shift += 8) {
-      if (sort(arr, arrOff, len, histogram, shift, buf, bufOff)) {
+
+    for (int shift = 0; shift < numBits; shift += 8) {
+      if (sort(arr, len, histogram, shift, buf)) {
         // swap arrays
         int[] tmp = arr;
-        int tmpOff = arrOff;
         arr = buf;
-        arrOff = bufOff;
         buf = tmp;
-        bufOff = tmpOff;
       }
     }
 
     if (array == buf) {
-      System.arraycopy(arr, arrOff, array, off, len);
+      System.arraycopy(arr, 0, array, 0, len);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8ca6f665/lucene/core/src/test/org/apache/lucene/util/TestLSBRadixSorter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestLSBRadixSorter.java b/lucene/core/src/test/org/apache/lucene/util/TestLSBRadixSorter.java
index 020bc50..ba8bd02 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestLSBRadixSorter.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestLSBRadixSorter.java
@@ -19,28 +19,38 @@ package org.apache.lucene.util;
 
 import java.util.Arrays;
 
+import org.apache.lucene.util.packed.PackedInts;
+
 public class TestLSBRadixSorter extends LuceneTestCase {
 
   public void test(LSBRadixSorter sorter, int maxLen) {
     for (int iter = 0; iter < 10; ++iter) {
-      int off = random().nextInt(10);
       final int len = TestUtil.nextInt(random(), 0, maxLen);
-      int[] arr = new int[off + len + random().nextInt(10)];
+      int[] arr = new int[len + random().nextInt(10)];
       final int numBits = random().nextInt(31);
       final int maxValue = (1 << numBits) - 1;
       for (int i = 0; i < arr.length; ++i) {
         arr[i] = TestUtil.nextInt(random(), 0, maxValue);
       }
-      test(sorter, arr, off, len);
+      test(sorter, arr, len);
     }
   }
 
-  public void test(LSBRadixSorter sorter, int[] arr, int off, int len) {
-    final int[] expected = Arrays.copyOfRange(arr, off, off + len);
+  public void test(LSBRadixSorter sorter, int[] arr, int len) {
+    final int[] expected = Arrays.copyOf(arr, len);
     Arrays.sort(expected);
 
-    sorter.sort(arr, off, len);
-    final int[] actual = Arrays.copyOfRange(arr, off, off + len);
+    int numBits = 0;
+    for (int i = 0; i < len; ++i) {
+      numBits = Math.max(numBits, PackedInts.bitsRequired(arr[i]));
+    }
+
+    if (random().nextBoolean()) {
+      numBits = TestUtil.nextInt(random(), numBits, 32);
+    }
+
+    sorter.sort(numBits, arr, len);
+    final int[] actual = Arrays.copyOf(arr, len);
     assertArrayEquals(expected, actual);
   }
 
@@ -73,9 +83,8 @@ public class TestLSBRadixSorter extends LuceneTestCase {
         a += random().nextInt(10);
         arr[i] = a;
       }
-      final int off = random().nextInt(arr.length);
-      final int len = TestUtil.nextInt(random(), 0, arr.length - off);
-      test(sorter, arr, off, len);
+      final int len = TestUtil.nextInt(random(), 0, arr.length);
+      test(sorter, arr, len);
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8ca6f665/solr/core/src/java/org/apache/solr/search/DocSetBuilder.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/DocSetBuilder.java b/solr/core/src/java/org/apache/solr/search/DocSetBuilder.java
index a5ca024..bd180c7 100644
--- a/solr/core/src/java/org/apache/solr/search/DocSetBuilder.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSetBuilder.java
@@ -21,11 +21,10 @@ import java.io.IOException;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.LSBRadixSorter;
-import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.PackedInts;
 
 /**
  * Adapted from DocIdSetBuilder to build DocSets
@@ -188,7 +187,7 @@ public final class DocSetBuilder {
       // TODO - if this set will be cached, should we make it smaller if it's below DocSetUtil.smallSetSize?
     } else {
       LSBRadixSorter sorter = new LSBRadixSorter();
-      sorter.sort(buffer, 0, pos);
+      sorter.sort(PackedInts.bitsRequired(maxDoc - 1), buffer, pos);
       final int l = dedup(buffer, pos, filter);
       assert l <= pos;
       return new SortedIntDocSet(buffer, l);  // TODO: have option to not shrink in the future if it will be a temporary set