You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/04/10 18:44:48 UTC
svn commit: r1466558 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/search/
Author: rmuir
Date: Wed Apr 10 16:44:47 2013
New Revision: 1466558
URL: http://svn.apache.org/r1466558
Log:
LUCENE-4923: remove minShouldMatch/speed up DisjunctionSumScorer
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1466558&r1=1466557&r2=1466558&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Wed Apr 10 16:44:47 2013
@@ -173,6 +173,9 @@ Optimizations
* LUCENE-4889: UnicodeUtil.codePointCount implementation replaced with a
non-array-lookup version. (Dawid Weiss)
+* LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
+ (Robert Muir)
+
API Changes
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1466558&r1=1466557&r2=1466558&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java Wed Apr 10 16:44:47 2013
@@ -346,11 +346,21 @@ public class BooleanQuery extends Query
return null;
}
+ // simple conjunction
if (optional.size() == 0 && prohibited.size() == 0) {
float coord = disableCoord ? 1.0f : coord(required.size(), maxCoord);
return new ConjunctionScorer(this, required.toArray(new Scorer[required.size()]), coord);
}
+ // simple disjunction
+ if (required.size() == 0 && prohibited.size() == 0 && minNrShouldMatch <= 1 && optional.size() > 1) {
+ float coord[] = new float[optional.size()+1];
+ for (int i = 0; i < coord.length; i++) {
+ coord[i] = disableCoord ? 1.0f : coord(i, maxCoord);
+ }
+ return new DisjunctionSumScorer(this, optional.toArray(new Scorer[optional.size()]), coord);
+ }
+
// Return a BooleanScorer2
return new BooleanScorer2(this, disableCoord, minNrShouldMatch, required, prohibited, optional, maxCoord);
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java?rev=1466558&r1=1466557&r2=1466558&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java Wed Apr 10 16:44:47 2013
@@ -159,38 +159,19 @@ class BooleanScorer2 extends Scorer {
// each scorer from the list counted as a single matcher
if (minNrShouldMatch > 1) {
return new MinShouldMatchSumScorer(weight, scorers, minNrShouldMatch) {
- private int lastScoredDoc = -1;
- // Save the score of lastScoredDoc, so that we don't compute it more than
- // once in score().
- private float lastDocScore = Float.NaN;
- @Override public float score() throws IOException {
- int doc = docID();
- if (doc >= lastScoredDoc) {
- if (doc > lastScoredDoc) {
- lastDocScore = super.score();
- lastScoredDoc = doc;
- }
- coordinator.nrMatchers += super.nrMatchers;
- }
- return lastDocScore;
+ @Override
+ public float score() throws IOException {
+ coordinator.nrMatchers += super.nrMatchers;
+ return super.score();
}
};
} else {
- return new DisjunctionSumScorer(weight, scorers) {
- private int lastScoredDoc = -1;
- // Save the score of lastScoredDoc, so that we don't compute it more than
- // once in score().
- private float lastDocScore = Float.NaN;
- @Override public float score() throws IOException {
- int doc = docID();
- if (doc >= lastScoredDoc) {
- if (doc > lastScoredDoc) {
- lastDocScore = super.score();
- lastScoredDoc = doc;
- }
- coordinator.nrMatchers += super.nrMatchers;
- }
- return lastDocScore;
+ // we pass null for coord[] since we coordinate ourselves and override score()
+ return new DisjunctionSumScorer(weight, scorers.toArray(new Scorer[scorers.size()]), null) {
+ @Override
+ public float score() throws IOException {
+ coordinator.nrMatchers += super.nrMatchers;
+ return (float) super.score;
}
};
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java?rev=1466558&r1=1466557&r2=1466558&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java Wed Apr 10 16:44:47 2013
@@ -17,84 +17,58 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import java.util.List;
import java.io.IOException;
/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
*/
class DisjunctionSumScorer extends DisjunctionScorer {
- /** The minimum number of scorers that should match. */
- private final int minimumNrMatchers;
-
/** The document number of the current match. */
private int doc = -1;
/** The number of subscorers that provide the current match. */
protected int nrMatchers = -1;
- private double score = Float.NaN;
+ protected double score = Float.NaN;
+ private final float[] coord;
/** Construct a <code>DisjunctionScorer</code>.
* @param weight The weight to be used.
- * @param subScorers A collection of at least two subscorers.
- * @param minimumNrMatchers The positive minimum number of subscorers that should
- * match to match this query.
- * <br>When <code>minimumNrMatchers</code> is bigger than
- * the number of <code>subScorers</code>,
- * no matches will be produced.
- * <br>When minimumNrMatchers equals the number of subScorers,
- * it more efficient to use <code>ConjunctionScorer</code>.
+ * @param subScorers Array of at least two subscorers.
+ * @param coord Table of coordination factors
*/
- public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers, int minimumNrMatchers) throws IOException {
- super(weight, subScorers.toArray(new Scorer[subScorers.size()]), subScorers.size());
+ DisjunctionSumScorer(Weight weight, Scorer[] subScorers, float[] coord) throws IOException {
+ super(weight, subScorers, subScorers.length);
- if (minimumNrMatchers <= 0) {
- throw new IllegalArgumentException("Minimum nr of matchers must be positive");
- }
if (numScorers <= 1) {
throw new IllegalArgumentException("There must be at least 2 subScorers");
}
-
- this.minimumNrMatchers = minimumNrMatchers;
- }
-
- /** Construct a <code>DisjunctionScorer</code>, using one as the minimum number
- * of matching subscorers.
- */
- public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers) throws IOException {
- this(weight, subScorers, 1);
+ this.coord = coord;
}
@Override
public int nextDoc() throws IOException {
assert doc != NO_MORE_DOCS;
while(true) {
- while (subScorers[0].docID() == doc) {
- if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
- heapAdjust(0);
- } else {
- heapRemoveRoot();
- if (numScorers < minimumNrMatchers) {
- return doc = NO_MORE_DOCS;
- }
+ if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
+ heapAdjust(0);
+ } else {
+ heapRemoveRoot();
+ if (numScorers == 0) {
+ return doc = NO_MORE_DOCS;
}
}
- afterNext();
- if (nrMatchers >= minimumNrMatchers) {
- break;
+ if (subScorers[0].docID() != doc) {
+ afterNext();
+ return doc;
}
}
-
- return doc;
}
private void afterNext() throws IOException {
final Scorer sub = subScorers[0];
doc = sub.docID();
- if (doc == NO_MORE_DOCS) {
- nrMatchers = Integer.MAX_VALUE; // stop looping
- } else {
+ if (doc != NO_MORE_DOCS) {
score = sub.score();
nrMatchers = 1;
countMatches(1);
@@ -104,9 +78,8 @@ class DisjunctionSumScorer extends Disju
// TODO: this currently scores, but so did the previous impl
// TODO: remove recursion.
- // TODO: if we separate scoring, out of here, modify this
- // and afterNext() to terminate when nrMatchers == minimumNrMatchers
- // then also change freq() to just always compute it from scratch
+ // TODO: if we separate scoring, out of here,
+ // then change freq() to just always compute it from scratch
private void countMatches(int root) throws IOException {
if (root < numScorers && subScorers[root].docID() == doc) {
nrMatchers++;
@@ -121,7 +94,7 @@ class DisjunctionSumScorer extends Disju
*/
@Override
public float score() throws IOException {
- return (float)score;
+ return (float)score * coord[nrMatchers];
}
@Override
@@ -146,8 +119,8 @@ class DisjunctionSumScorer extends Disju
*/
@Override
public int advance(int target) throws IOException {
- if (numScorers == 0) return doc = NO_MORE_DOCS;
- while (subScorers[0].docID() < target) {
+ assert doc != NO_MORE_DOCS;
+ while(true) {
if (subScorers[0].advance(target) != NO_MORE_DOCS) {
heapAdjust(0);
} else {
@@ -156,14 +129,10 @@ class DisjunctionSumScorer extends Disju
return doc = NO_MORE_DOCS;
}
}
- }
-
- afterNext();
-
- if (nrMatchers >= minimumNrMatchers) {
- return doc;
- } else {
- return nextDoc();
+ if (subScorers[0].docID() >= target) {
+ afterNext();
+ return doc;
+ }
}
}
}