You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/02/26 11:28:34 UTC
svn commit: r1662410 - in /lucene/dev/trunk/lucene: CHANGES.txt
core/src/java/org/apache/lucene/util/TimSorter.java
core/src/test/org/apache/lucene/util/TestTimSorter.java
Author: jpountz
Date: Thu Feb 26 10:28:33 2015
New Revision: 1662410
URL: http://svn.apache.org/r1662410
Log:
LUCENE-6293: Fixed TimSorter bug.
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/TimSorter.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1662410&r1=1662409&r2=1662410&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Feb 26 10:28:33 2015
@@ -59,6 +59,8 @@ Bug Fixes
* LUCENE-6242: Ram usage estimation was incorrect for SparseFixedBitSet when
object alignment was different from 8. (Uwe Schindler, Adrien Grand)
+* LUCENE-6293: Fixed TimSorter bug. (Adrien Grand)
+
Optimizations
* LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/TimSorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/TimSorter.java?rev=1662410&r1=1662409&r2=1662410&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/TimSorter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/TimSorter.java Thu Feb 26 10:28:33 2015
@@ -40,7 +40,7 @@ public abstract class TimSorter extends
static final int MINRUN = 32;
static final int THRESHOLD = 64;
- static final int STACKSIZE = 40; // depends on MINRUN
+ static final int STACKSIZE = 49; // depends on MINRUN
static final int MIN_GALLOP = 7;
final int maxTempSlots;
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java?rev=1662410&r1=1662409&r2=1662410&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java Thu Feb 26 10:28:33 2015
@@ -17,6 +17,13 @@ package org.apache.lucene.util;
* limitations under the License.
*/
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.lucene.util.packed.PackedInts;
+
+import com.carrotsearch.randomizedtesting.generators.RandomInts;
+
public class TestTimSorter extends BaseSortTestCase {
public TestTimSorter() {
@@ -28,4 +35,140 @@ public class TestTimSorter extends BaseS
return new ArrayTimSorter<>(arr, ArrayUtil.<Entry>naturalComparator(), TestUtil.nextInt(random(), 0, arr.length));
}
+ public void testWorstCaseStackSize() {
+ // we need large arrays to be able to reproduce this bug
+ final int length;
+ if (TEST_NIGHTLY) {
+ length = RandomInts.randomIntBetween(random(), 140000000, Integer.MAX_VALUE);
+ } else {
+ length = RandomInts.randomIntBetween(random(), 140000000, 200000000);
+ }
+ final PackedInts.Mutable arr = generateWorstCaseArray(length);
+ new TimSorter(0) {
+
+ @Override
+ protected void swap(int i, int j) {
+ final long tmp = arr.get(i);
+ arr.set(i, arr.get(j));
+ arr.set(j, tmp);
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ return Long.compare(arr.get(i), arr.get(j));
+ }
+
+ @Override
+ protected void save(int i, int len) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ protected void restore(int i, int j) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ protected void copy(int src, int dest) {
+ arr.set(dest, arr.get(src));
+ }
+
+ @Override
+ protected int compareSaved(int i, int j) {
+ throw new UnsupportedOperationException();
+ }
+ }.sort(0, length);
+ }
+
+ /** Create an array for the given list of runs. */
+ private static PackedInts.Mutable createArray(int length, List<Integer> runs) {
+ PackedInts.Mutable array = PackedInts.getMutable(length, 1, 0);
+ int endRun = -1;
+ for (long len : runs) {
+ array.set(endRun += len, 1);
+ }
+ array.set(length - 1, 0);
+ return array;
+ }
+
+ /** Create an array that triggers a worst-case sequence of run lens. */
+ public static PackedInts.Mutable generateWorstCaseArray(int length) {
+ final int minRun = TimSorter.minRun(length);
+ final List<Integer> runs = runsWorstCase(length, minRun);
+ return createArray(length, runs);
+ }
+
+ //
+ // Code below is borrowed from https://github.com/abstools/java-timsort-bug/blob/master/TestTimSort.java
+ //
+
+ /**
+ * Fills <code>runs</code> with a sequence of run lengths of the form<br>
+ * Y_n x_{n,1} x_{n,2} ... x_{n,l_n} <br>
+ * Y_{n-1} x_{n-1,1} x_{n-1,2} ... x_{n-1,l_{n-1}} <br>
+ * ... <br>
+ * Y_1 x_{1,1} x_{1,2} ... x_{1,l_1}<br>
+ * The Y_i's are chosen to satisfy the invariant throughout execution,
+ * but the x_{i,j}'s are merged (by <code>TimSort.mergeCollapse</code>)
+ * into an X_i that violates the invariant.
+ */
+ private static List<Integer> runsWorstCase(int length, int minRun) {
+ List<Integer> runs = new LinkedList<>();
+
+ int runningTotal = 0, Y=minRun+4, X=minRun;
+
+ while((long) runningTotal+Y+X <= length) {
+ runningTotal += X + Y;
+ generateWrongElem(X, minRun, runs);
+ runs.add(0,Y);
+
+ // X_{i+1} = Y_i + x_{i,1} + 1, since runs.get(1) = x_{i,1}
+ X = Y + runs.get(1) + 1;
+
+ // Y_{i+1} = X_{i+1} + Y_i + 1
+ Y += X + 1;
+ }
+
+ if(runningTotal + X <= length) {
+ runningTotal += X;
+ generateWrongElem(X, minRun, runs);
+ }
+
+ runs.add(length-runningTotal);
+ return runs;
+ }
+
+ /**
+ * Adds a sequence x_1, ..., x_n of run lengths to <code>runs</code> such that:<br>
+ * 1. X = x_1 + ... + x_n <br>
+ * 2. x_j >= minRun for all j <br>
+ * 3. x_1 + ... + x_{j-2} < x_j < x_1 + ... + x_{j-1} for all j <br>
+ * These conditions guarantee that TimSort merges all x_j's one by one
+ * (resulting in X) using only merges on the second-to-last element.
+ * @param X The sum of the sequence that should be added to runs.
+ */
+ private static void generateWrongElem(int X, int minRun, List<Integer> runs) {
+ for(int newTotal; X >= 2*minRun+1; X = newTotal) {
+ //Default strategy
+ newTotal = X/2 + 1;
+
+ //Specialized strategies
+ if(3*minRun+3 <= X && X <= 4*minRun+1) {
+ // add x_1=MIN+1, x_2=MIN, x_3=X-newTotal to runs
+ newTotal = 2*minRun+1;
+ } else if(5*minRun+5 <= X && X <= 6*minRun+5) {
+ // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=X-newTotal to runs
+ newTotal = 3*minRun+3;
+ } else if(8*minRun+9 <= X && X <= 10*minRun+9) {
+ // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=X-newTotal to runs
+ newTotal = 5*minRun+5;
+ } else if(13*minRun+15 <= X && X <= 16*minRun+17) {
+ // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=3MIN+4, x_6=X-newTotal to runs
+ newTotal = 8*minRun+9;
+ }
+ runs.add(0, X-newTotal);
+ }
+ runs.add(0, X);
+ }
+
}