You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/02/26 11:28:34 UTC

svn commit: r1662410 - in /lucene/dev/trunk/lucene: CHANGES.txt core/src/java/org/apache/lucene/util/TimSorter.java core/src/test/org/apache/lucene/util/TestTimSorter.java

Author: jpountz
Date: Thu Feb 26 10:28:33 2015
New Revision: 1662410

URL: http://svn.apache.org/r1662410
Log:
LUCENE-6293: Fixed TimSorter bug.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/TimSorter.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1662410&r1=1662409&r2=1662410&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Feb 26 10:28:33 2015
@@ -59,6 +59,8 @@ Bug Fixes
 * LUCENE-6242: Ram usage estimation was incorrect for SparseFixedBitSet when
   object alignment was different from 8. (Uwe Schindler, Adrien Grand)
 
+* LUCENE-6293: Fixed TimSorter bug. (Adrien Grand)
+
 Optimizations
 
 * LUCENE-6183, LUCENE-5647: Avoid recompressing stored fields

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/TimSorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/TimSorter.java?rev=1662410&r1=1662409&r2=1662410&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/TimSorter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/TimSorter.java Thu Feb 26 10:28:33 2015
@@ -40,7 +40,7 @@ public abstract class TimSorter extends
 
   static final int MINRUN = 32;
   static final int THRESHOLD = 64;
-  static final int STACKSIZE = 40; // depends on MINRUN
+  static final int STACKSIZE = 49; // depends on MINRUN
   static final int MIN_GALLOP = 7;
 
   final int maxTempSlots;

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java?rev=1662410&r1=1662409&r2=1662410&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestTimSorter.java Thu Feb 26 10:28:33 2015
@@ -17,6 +17,13 @@ package org.apache.lucene.util;
  * limitations under the License.
  */
 
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.lucene.util.packed.PackedInts;
+
+import com.carrotsearch.randomizedtesting.generators.RandomInts;
+
 public class TestTimSorter extends BaseSortTestCase {
 
   public TestTimSorter() {
@@ -28,4 +35,140 @@ public class TestTimSorter extends BaseS
     return new ArrayTimSorter<>(arr, ArrayUtil.<Entry>naturalComparator(), TestUtil.nextInt(random(), 0, arr.length));
   }
 
+  public void testWorstCaseStackSize() {
+    // we need large arrays to be able to reproduce this bug
+    final int length;
+    if (TEST_NIGHTLY) {
+      length = RandomInts.randomIntBetween(random(), 140000000, Integer.MAX_VALUE);
+    } else {
+      length = RandomInts.randomIntBetween(random(), 140000000, 200000000);
+    }
+    final PackedInts.Mutable arr = generateWorstCaseArray(length);
+    new TimSorter(0) {
+
+      @Override
+      protected void swap(int i, int j) {
+        final long tmp = arr.get(i);
+        arr.set(i, arr.get(j));
+        arr.set(j, tmp);
+      }
+
+      @Override
+      protected int compare(int i, int j) {
+        return Long.compare(arr.get(i), arr.get(j));
+      }
+
+      @Override
+      protected void save(int i, int len) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      protected void restore(int i, int j) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      protected void copy(int src, int dest) {
+        arr.set(dest, arr.get(src));
+      }
+
+      @Override
+      protected int compareSaved(int i, int j) {
+        throw new UnsupportedOperationException();
+      }
+    }.sort(0, length);
+  }
+
+  /** Create an array for the given list of runs. */
+  private static PackedInts.Mutable createArray(int length, List<Integer> runs) {
+    PackedInts.Mutable array = PackedInts.getMutable(length, 1, 0);
+    int endRun = -1;
+    for (long len : runs) {
+      array.set(endRun += len, 1);
+    }
+    array.set(length - 1, 0);
+    return array;
+  }
+
+  /** Create an array that triggers a worst-case sequence of run lens. */
+  public static PackedInts.Mutable generateWorstCaseArray(int length) {
+    final int minRun = TimSorter.minRun(length);
+    final List<Integer> runs = runsWorstCase(length, minRun);
+    return createArray(length, runs);
+  }
+
+  //
+  // Code below is borrowed from https://github.com/abstools/java-timsort-bug/blob/master/TestTimSort.java
+  //
+
+  /**
+   * Fills <code>runs</code> with a sequence of run lengths of the form<br>
+   * Y_n     x_{n,1}   x_{n,2}   ... x_{n,l_n} <br>
+   * Y_{n-1} x_{n-1,1} x_{n-1,2} ... x_{n-1,l_{n-1}} <br>
+   * ... <br>
+   * Y_1     x_{1,1}   x_{1,2}   ... x_{1,l_1}<br>
+   * The Y_i's are chosen to satisfy the invariant throughout execution,
+   * but the x_{i,j}'s are merged (by <code>TimSort.mergeCollapse</code>)
+   * into an X_i that violates the invariant.
+   */
+  private static List<Integer> runsWorstCase(int length, int minRun) {
+    List<Integer> runs = new LinkedList<>();
+
+    int runningTotal = 0, Y=minRun+4, X=minRun;
+
+    while((long) runningTotal+Y+X <= length) {
+      runningTotal += X + Y;
+      generateWrongElem(X, minRun, runs);
+      runs.add(0,Y);
+
+      // X_{i+1} = Y_i + x_{i,1} + 1, since runs.get(1) = x_{i,1}
+      X = Y + runs.get(1) + 1;
+
+      // Y_{i+1} = X_{i+1} + Y_i + 1
+      Y += X + 1;
+    }
+
+    if(runningTotal + X <= length) {
+      runningTotal += X;
+      generateWrongElem(X, minRun, runs);
+    }
+
+    runs.add(length-runningTotal);
+    return runs;
+  }
+
+  /**
+   * Adds a sequence x_1, ..., x_n of run lengths to <code>runs</code> such that:<br>
+   * 1. X = x_1 + ... + x_n <br>
+   * 2. x_j >= minRun for all j <br>
+   * 3. x_1 + ... + x_{j-2}  <  x_j  <  x_1 + ... + x_{j-1} for all j <br>
+   * These conditions guarantee that TimSort merges all x_j's one by one
+   * (resulting in X) using only merges on the second-to-last element.
+   * @param X  The sum of the sequence that should be added to runs.
+   */
+  private static void generateWrongElem(int X, int minRun, List<Integer> runs) {
+    for(int newTotal; X >= 2*minRun+1; X = newTotal) {
+      //Default strategy
+      newTotal = X/2 + 1;
+
+      //Specialized strategies
+      if(3*minRun+3 <= X && X <= 4*minRun+1) {
+        // add x_1=MIN+1, x_2=MIN, x_3=X-newTotal  to runs
+        newTotal = 2*minRun+1;
+      } else if(5*minRun+5 <= X && X <= 6*minRun+5) {
+        // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=X-newTotal  to runs
+        newTotal = 3*minRun+3;
+      } else if(8*minRun+9 <= X && X <= 10*minRun+9) {
+        // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=X-newTotal  to runs
+        newTotal = 5*minRun+5;
+      } else if(13*minRun+15 <= X && X <= 16*minRun+17) {
+        // add x_1=MIN+1, x_2=MIN, x_3=MIN+2, x_4=2MIN+2, x_5=3MIN+4, x_6=X-newTotal  to runs
+        newTotal = 8*minRun+9;
+      }
+      runs.add(0, X-newTotal);
+    }
+    runs.add(0, X);
+  }
+
 }