You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/05/02 18:06:06 UTC

svn commit: r1098639 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/backwards/ lucene/src/java/org/apache/lucene/search/ lucene/src/java/org/apache/lucene/util/ lucene/src/test/org/apache/lucene/util/ solr/

Author: uschindler
Date: Mon May  2 16:06:05 2011
New Revision: 1098639

URL: http://svn.apache.org/viewvc?rev=1098639&view=rev
Log:
LUCENE-3054: PhraseQuery can in some cases stack overflow in SorterTemplate.quickSort(). This fix also adds an optimization to PhraseQuery as term with lower doc freq will also have less positions

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/backwards/   (props changed)
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SorterTemplate.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java
    lucene/dev/branches/branch_3x/solr/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1098639&r1=1098638&r2=1098639&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Mon May  2 16:06:05 2011
@@ -38,6 +38,11 @@ Bug fixes
   very special use cases of the TokenStream-API, most users would not
   have recognized it.  (Uwe Schindler, Robert Muir)
 
+* LUCENE-3054: PhraseQuery can in some cases stack overflow in
+  SorterTemplate.quickSort(). This fix also adds an optimization to
+  PhraseQuery as term with lower doc freq will also have less positions.
+  (Uwe Schindler, Robert Muir, Otis Gospodnetic)
+
 Test Cases
 
 * LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to 

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1098639&r1=1098638&r2=1098639&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Mon May  2 16:06:05 2011
@@ -197,7 +197,7 @@ public class MultiPhraseQuery extends Qu
             return null;
         }
 
-        postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(p, docFreq, positions.get(pos).intValue());
+        postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(p, docFreq, positions.get(pos).intValue(), terms[0]);
       }
 
       // sort by increasing docFreq order

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1098639&r1=1098638&r2=1098639&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/PhraseQuery.java Mon May  2 16:06:05 2011
@@ -122,16 +122,48 @@ public class PhraseQuery extends Query {
     final TermPositions postings;
     final int docFreq;
     final int position;
+    final Term term;
 
-    public PostingsAndFreq(TermPositions postings, int docFreq, int position) {
+    public PostingsAndFreq(TermPositions postings, int docFreq, int position, Term term) {
       this.postings = postings;
       this.docFreq = docFreq;
       this.position = position;
+      this.term = term;
     }
 
     public int compareTo(PostingsAndFreq other) {
+      if (docFreq == other.docFreq) {
+        if (position == other.position) {
+          return term.compareTo(other.term);
+        }
+        return position - other.position;
+      }
       return docFreq - other.docFreq;
     }
+
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = 1;
+      result = prime * result + docFreq;
+      result = prime * result + position;
+      result = prime * result + ((term == null) ? 0 : term.hashCode());
+      return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) return true;
+      if (obj == null) return false;
+      if (getClass() != obj.getClass()) return false;
+      PostingsAndFreq other = (PostingsAndFreq) obj;
+      if (docFreq != other.docFreq) return false;
+      if (position != other.position) return false;
+      if (term == null) {
+        if (other.term != null) return false;
+      } else if (!term.equals(other.term)) return false;
+      return true;
+    }
   }
 
   private class PhraseWeight extends Weight {
@@ -183,7 +215,7 @@ public class PhraseQuery extends Query {
         TermPositions p = reader.termPositions(t);
         if (p == null)
           return null;
-        postingsFreqs[i] = new PostingsAndFreq(p, reader.docFreq(t), positions.get(i).intValue());
+        postingsFreqs[i] = new PostingsAndFreq(p, reader.docFreq(t), positions.get(i).intValue(), t);
       }
 
       // sort by increasing docFreq order

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SorterTemplate.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SorterTemplate.java?rev=1098639&r1=1098638&r2=1098639&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SorterTemplate.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SorterTemplate.java Mon May  2 16:06:05 2011
@@ -30,6 +30,7 @@ package org.apache.lucene.util;
 public abstract class SorterTemplate {
 
   private static final int MERGESORT_THRESHOLD = 12;
+  private static final int MERGE_TO_QUICKSORT_THRESHOLD = 40;
   private static final int QUICKSORT_THRESHOLD = 7;
 
   /** Implement this method, that swaps slots {@code i} and {@code j} in your data */
@@ -63,6 +64,10 @@ public abstract class SorterTemplate {
   /** Sorts via in-place, but unstable, QuickSort algorithm.
    * For small collections falls back to {@link #insertionSort(int,int)}. */
   public final void quickSort(int lo, int hi) {
+    quickSort(lo, hi, MERGE_TO_QUICKSORT_THRESHOLD);
+  }
+  
+  private void quickSort(int lo, int hi, int maxDepth) {
     final int diff = hi - lo;
     if (diff <= QUICKSORT_THRESHOLD) {
       insertionSort(lo, hi);
@@ -101,8 +106,16 @@ public abstract class SorterTemplate {
       }
     }
 
-    quickSort(lo, left);
-    quickSort(left + 1, hi);
+    // fall back to merge sort when recursion depth gets too big
+    if (maxDepth == 0) {
+      // for testing: new Exception("Hit recursion depth limit").printStackTrace();
+      mergeSort(lo, left);
+      mergeSort(left + 1, hi);
+    } else {
+      --maxDepth;
+      quickSort(lo, left, maxDepth);
+      quickSort(left + 1, hi, maxDepth);
+    }
   }
   
   /** Sorts via stable in-place MergeSort algorithm

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java?rev=1098639&r1=1098638&r2=1098639&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java Mon May  2 16:06:05 2011
@@ -131,6 +131,24 @@ public class TestArrayUtil extends Lucen
     }
   }
   
+  private Integer[] createSparseRandomArray(int maxSize) {
+    final Integer[] a = new Integer[random.nextInt(maxSize) + 1];
+    for (int i = 0; i < a.length; i++) {
+      a[i] = Integer.valueOf(random.nextInt(2));
+    }
+    return a;
+  }
+  
+  // This is a test for LUCENE-3054 (which fails without the merge sort fall back with stack overflow in most cases)
+  public void testQuickToMergeSortFallback() {
+    for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
+      Integer[] a1 = createSparseRandomArray(40000), a2 = a1.clone();
+      ArrayUtil.quickSort(a1);
+      Arrays.sort(a2);
+      assertArrayEquals(a2, a1);
+    }
+  }
+  
   public void testMergeSort() {
     for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
       Integer[] a1 = createRandomArray(1000), a2 = a1.clone();