You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2011/05/02 18:06:06 UTC
svn commit: r1098639 - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/backwards/ lucene/src/java/org/apache/lucene/search/
lucene/src/java/org/apache/lucene/util/
lucene/src/test/org/apache/lucene/util/ solr/
Author: uschindler
Date: Mon May 2 16:06:05 2011
New Revision: 1098639
URL: http://svn.apache.org/viewvc?rev=1098639&view=rev
Log:
LUCENE-3054: PhraseQuery can in some cases stack overflow in SorterTemplate.quickSort(). This fix also adds an optimization to PhraseQuery as term with lower doc freq will also have less positions
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/CHANGES.txt
lucene/dev/branches/branch_3x/lucene/backwards/ (props changed)
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SorterTemplate.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java
lucene/dev/branches/branch_3x/solr/ (props changed)
Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1098639&r1=1098638&r2=1098639&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Mon May 2 16:06:05 2011
@@ -38,6 +38,11 @@ Bug fixes
very special use cases of the TokenStream-API, most users would not
have recognized it. (Uwe Schindler, Robert Muir)
+* LUCENE-3054: PhraseQuery can in some cases stack overflow in
+ SorterTemplate.quickSort(). This fix also adds an optimization to
+ PhraseQuery as term with lower doc freq will also have less positions.
+ (Uwe Schindler, Robert Muir, Otis Gospodnetic)
+
Test Cases
* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java?rev=1098639&r1=1098638&r2=1098639&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java Mon May 2 16:06:05 2011
@@ -197,7 +197,7 @@ public class MultiPhraseQuery extends Qu
return null;
}
- postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(p, docFreq, positions.get(pos).intValue());
+ postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(p, docFreq, positions.get(pos).intValue(), terms[0]);
}
// sort by increasing docFreq order
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/PhraseQuery.java?rev=1098639&r1=1098638&r2=1098639&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/PhraseQuery.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/PhraseQuery.java Mon May 2 16:06:05 2011
@@ -122,16 +122,48 @@ public class PhraseQuery extends Query {
final TermPositions postings;
final int docFreq;
final int position;
+ final Term term;
- public PostingsAndFreq(TermPositions postings, int docFreq, int position) {
+ public PostingsAndFreq(TermPositions postings, int docFreq, int position, Term term) {
this.postings = postings;
this.docFreq = docFreq;
this.position = position;
+ this.term = term;
}
public int compareTo(PostingsAndFreq other) {
+ if (docFreq == other.docFreq) {
+ if (position == other.position) {
+ return term.compareTo(other.term);
+ }
+ return position - other.position;
+ }
return docFreq - other.docFreq;
}
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + docFreq;
+ result = prime * result + position;
+ result = prime * result + ((term == null) ? 0 : term.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) return true;
+ if (obj == null) return false;
+ if (getClass() != obj.getClass()) return false;
+ PostingsAndFreq other = (PostingsAndFreq) obj;
+ if (docFreq != other.docFreq) return false;
+ if (position != other.position) return false;
+ if (term == null) {
+ if (other.term != null) return false;
+ } else if (!term.equals(other.term)) return false;
+ return true;
+ }
}
private class PhraseWeight extends Weight {
@@ -183,7 +215,7 @@ public class PhraseQuery extends Query {
TermPositions p = reader.termPositions(t);
if (p == null)
return null;
- postingsFreqs[i] = new PostingsAndFreq(p, reader.docFreq(t), positions.get(i).intValue());
+ postingsFreqs[i] = new PostingsAndFreq(p, reader.docFreq(t), positions.get(i).intValue(), t);
}
// sort by increasing docFreq order
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SorterTemplate.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SorterTemplate.java?rev=1098639&r1=1098638&r2=1098639&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SorterTemplate.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/util/SorterTemplate.java Mon May 2 16:06:05 2011
@@ -30,6 +30,7 @@ package org.apache.lucene.util;
public abstract class SorterTemplate {
private static final int MERGESORT_THRESHOLD = 12;
+ private static final int MERGE_TO_QUICKSORT_THRESHOLD = 40;
private static final int QUICKSORT_THRESHOLD = 7;
/** Implement this method, that swaps slots {@code i} and {@code j} in your data */
@@ -63,6 +64,10 @@ public abstract class SorterTemplate {
/** Sorts via in-place, but unstable, QuickSort algorithm.
* For small collections falls back to {@link #insertionSort(int,int)}. */
public final void quickSort(int lo, int hi) {
+ quickSort(lo, hi, MERGE_TO_QUICKSORT_THRESHOLD);
+ }
+
+ private void quickSort(int lo, int hi, int maxDepth) {
final int diff = hi - lo;
if (diff <= QUICKSORT_THRESHOLD) {
insertionSort(lo, hi);
@@ -101,8 +106,16 @@ public abstract class SorterTemplate {
}
}
- quickSort(lo, left);
- quickSort(left + 1, hi);
+ // fall back to merge sort when recursion depth gets too big
+ if (maxDepth == 0) {
+ // for testing: new Exception("Hit recursion depth limit").printStackTrace();
+ mergeSort(lo, left);
+ mergeSort(left + 1, hi);
+ } else {
+ --maxDepth;
+ quickSort(lo, left, maxDepth);
+ quickSort(left + 1, hi, maxDepth);
+ }
}
/** Sorts via stable in-place MergeSort algorithm
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java?rev=1098639&r1=1098638&r2=1098639&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/util/TestArrayUtil.java Mon May 2 16:06:05 2011
@@ -131,6 +131,24 @@ public class TestArrayUtil extends Lucen
}
}
+ private Integer[] createSparseRandomArray(int maxSize) {
+ final Integer[] a = new Integer[random.nextInt(maxSize) + 1];
+ for (int i = 0; i < a.length; i++) {
+ a[i] = Integer.valueOf(random.nextInt(2));
+ }
+ return a;
+ }
+
+ // This is a test for LUCENE-3054 (which fails without the merge sort fall back with stack overflow in most cases)
+ public void testQuickToMergeSortFallback() {
+ for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
+ Integer[] a1 = createSparseRandomArray(40000), a2 = a1.clone();
+ ArrayUtil.quickSort(a1);
+ Arrays.sort(a2);
+ assertArrayEquals(a2, a1);
+ }
+ }
+
public void testMergeSort() {
for (int i = 0, c = 500 * RANDOM_MULTIPLIER; i < c; i++) {
Integer[] a1 = createRandomArray(1000), a2 = a1.clone();