You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/07/18 23:21:00 UTC
svn commit: r1363121 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/core/src/java/org/apache/lucene/search/
lucene/core/src/java/org/apache/lucene/util/
lucene/core/src/test/org/apache/lucene/search/
lucene/grouping/src/java/org/apache/lucene/s...
Author: rmuir
Date: Wed Jul 18 21:20:58 2012
New Revision: 1363121
URL: http://svn.apache.org/viewvc?rev=1363121&view=rev
Log:
LUCENE-2686, LUCENE-3505: Fix various bugs in BooleanQuery, clean up scorer navigation API
Added:
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
- copied unchanged from r1363115, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java
- copied unchanged from r1363115, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestBooleanQueryVisitSubscorers.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestConjunctions.java
- copied unchanged from r1363115, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestConjunctions.java
Removed:
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/MatchOnlyConjunctionTermsScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/ScorerDocQueue.java
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/Scorer.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java
lucene/dev/branches/branch_4x/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java
lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java
lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/LatLonType.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Wed Jul 18 21:20:58 2012
@@ -74,6 +74,16 @@ Bug Fixes
* LUCENE-4222: TieredMergePolicy.getFloorSegmentMB was returning the
size in bytes not MB (Chris Fuller via Mike McCandless)
+* LUCENE-3505: Fix bug (Lucene 4.0alpha only) where boolean conjunctions
+ were sometimes scored incorrectly. Conjunctions of only termqueries where
+ at least one term omitted term frequencies (IndexOptions.DOCS_ONLY) would
+ be scored as if all terms omitted term frequencies. (Robert Muir)
+
+* LUCENE-2686, LUCENE-3505: Fixed BooleanQuery scorers to return correct
+ freq(). Added support for scorer navigation API (Scorer.getChildren) to
+ all queries. Made Scorer.freq() abstract.
+ (Koji Sekiguchi, Mike McCandless, Robert Muir)
+
Build
* LUCENE-4094: Support overriding file.encoding on forked test JVMs
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanQuery.java Wed Jul 18 21:20:58 2012
@@ -358,49 +358,17 @@ public class BooleanQuery extends Query
final DocsAndFreqs[] docsAndFreqs = new DocsAndFreqs[weights.size()];
for (int i = 0; i < docsAndFreqs.length; i++) {
final TermWeight weight = (TermWeight) weights.get(i);
- final TermsEnum termsEnum = weight.getTermsEnum(context);
- if (termsEnum == null) {
+ final Scorer scorer = weight.scorer(context, true, false, acceptDocs);
+ if (scorer == null) {
return null;
}
- final ExactSimScorer docScorer = weight.createDocScorer(context);
- final DocsEnum docsAndFreqsEnum = termsEnum.docs(acceptDocs, null, true);
- if (docsAndFreqsEnum == null) {
- // TODO: we could carry over TermState from the
- // terms we already seek'd to, to save re-seeking
- // to make the match-only scorer, but it's likely
- // rare that BQ mixes terms from omitTf and
- // non-omitTF fields:
-
- // At least one sub cannot provide freqs; abort
- // and fallback to full match-only scorer:
- return createMatchOnlyConjunctionTermScorer(context, acceptDocs);
- }
-
- docsAndFreqs[i] = new DocsAndFreqs(docsAndFreqsEnum,
- docsAndFreqsEnum,
- termsEnum.docFreq(), docScorer);
- }
- return new ConjunctionTermScorer(this, disableCoord ? 1.0f : coord(
- docsAndFreqs.length, docsAndFreqs.length), docsAndFreqs);
- }
-
- private Scorer createMatchOnlyConjunctionTermScorer(AtomicReaderContext context, Bits acceptDocs)
- throws IOException {
-
- final DocsAndFreqs[] docsAndFreqs = new DocsAndFreqs[weights.size()];
- for (int i = 0; i < docsAndFreqs.length; i++) {
- final TermWeight weight = (TermWeight) weights.get(i);
- final TermsEnum termsEnum = weight.getTermsEnum(context);
- if (termsEnum == null) {
- return null;
+ if (scorer instanceof TermScorer) {
+ docsAndFreqs[i] = new DocsAndFreqs((TermScorer) scorer);
+ } else {
+ docsAndFreqs[i] = new DocsAndFreqs((MatchOnlyTermScorer) scorer);
}
- final ExactSimScorer docScorer = weight.createDocScorer(context);
- docsAndFreqs[i] = new DocsAndFreqs(null,
- termsEnum.docs(acceptDocs, null, false),
- termsEnum.docFreq(), docScorer);
}
-
- return new MatchOnlyConjunctionTermScorer(this, disableCoord ? 1.0f : coord(
+ return new ConjunctionTermScorer(this, disableCoord ? 1.0f : coord(
docsAndFreqs.length, docsAndFreqs.length), docsAndFreqs);
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java Wed Jul 18 21:20:58 2012
@@ -318,6 +318,11 @@ final class BooleanScorer extends Scorer
}
@Override
+ public float freq() throws IOException {
+ return current.coord;
+ }
+
+ @Override
public void score(Collector collector) throws IOException {
score(collector, Integer.MAX_VALUE, -1);
}
@@ -338,7 +343,8 @@ final class BooleanScorer extends Scorer
public Collection<ChildScorer> getChildren() {
List<ChildScorer> children = new ArrayList<ChildScorer>();
for (SubScorer sub = scorers; sub != null; sub = sub.next) {
- children.add(new ChildScorer(sub.scorer, sub.prohibited ? Occur.MUST_NOT.toString() : Occur.SHOULD.toString()));
+ // TODO: fix this if BQ ever sends us required clauses
+ children.add(new ChildScorer(sub.scorer, sub.prohibited ? "MUST_NOT" : "SHOULD"));
}
return children;
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/BooleanScorer2.java Wed Jul 18 21:20:58 2012
@@ -131,6 +131,11 @@ class BooleanScorer2 extends Scorer {
}
@Override
+ public float freq() throws IOException {
+ return 1;
+ }
+
+ @Override
public int docID() {
return scorer.docID();
}
@@ -310,8 +315,8 @@ class BooleanScorer2 extends Scorer {
}
@Override
- public float freq() {
- return coordinator.nrMatchers;
+ public float freq() throws IOException {
+ return countingSumScorer.freq();
}
@Override
@@ -323,13 +328,13 @@ class BooleanScorer2 extends Scorer {
public Collection<ChildScorer> getChildren() {
ArrayList<ChildScorer> children = new ArrayList<ChildScorer>();
for (Scorer s : optionalScorers) {
- children.add(new ChildScorer(s, Occur.SHOULD.toString()));
+ children.add(new ChildScorer(s, "SHOULD"));
}
for (Scorer s : prohibitedScorers) {
- children.add(new ChildScorer(s, Occur.MUST_NOT.toString()));
+ children.add(new ChildScorer(s, "MUST_NOT"));
}
for (Scorer s : requiredScorers) {
- children.add(new ChildScorer(s, Occur.MUST.toString()));
+ children.add(new ChildScorer(s, "MUST"));
}
return children;
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java Wed Jul 18 21:20:58 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.search;
import org.apache.lucene.util.ArrayUtil;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
@@ -136,4 +137,18 @@ class ConjunctionScorer extends Scorer {
}
return sum * coord;
}
+
+ @Override
+ public float freq() throws IOException {
+ return scorers.length;
+ }
+
+ @Override
+ public Collection<ChildScorer> getChildren() {
+ ArrayList<ChildScorer> children = new ArrayList<ChildScorer>(scorers.length);
+ for (Scorer scorer : scorers) {
+ children.add(new ChildScorer(scorer, "MUST"));
+ }
+ return children;
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java Wed Jul 18 21:20:58 2012
@@ -18,10 +18,11 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
import java.util.Comparator;
import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
import org.apache.lucene.util.ArrayUtil;
/** Scorer for conjunctions, sets of terms, all of which are required. */
@@ -91,23 +92,43 @@ class ConjunctionTermScorer extends Scor
public float score() throws IOException {
float sum = 0.0f;
for (DocsAndFreqs docs : docsAndFreqs) {
- sum += docs.docScorer.score(lastDoc, docs.docs.freq());
+ sum += docs.scorer.score();
}
return sum * coord;
}
+
+ @Override
+ public float freq() {
+ return docsAndFreqs.length;
+ }
+
+ @Override
+ public Collection<ChildScorer> getChildren() {
+ ArrayList<ChildScorer> children = new ArrayList<ChildScorer>(docsAndFreqs.length);
+ for (DocsAndFreqs docs : docsAndFreqs) {
+ children.add(new ChildScorer(docs.scorer, "MUST"));
+ }
+ return children;
+ }
static final class DocsAndFreqs {
- final DocsEnum docsAndFreqs;
final DocsEnum docs;
final int docFreq;
- final ExactSimScorer docScorer;
+ final Scorer scorer;
int doc = -1;
- DocsAndFreqs(DocsEnum docsAndFreqs, DocsEnum docs, int docFreq, ExactSimScorer docScorer) {
- this.docsAndFreqs = docsAndFreqs;
+ DocsAndFreqs(TermScorer termScorer) {
+ this(termScorer, termScorer.getDocsEnum(), termScorer.getDocFreq());
+ }
+
+ DocsAndFreqs(MatchOnlyTermScorer termScorer) {
+ this(termScorer, termScorer.getDocsEnum(), termScorer.getDocFreq());
+ }
+
+ DocsAndFreqs(Scorer scorer, DocsEnum docs, int docFreq) {
this.docs = docs;
this.docFreq = docFreq;
- this.docScorer = docScorer;
+ this.scorer = scorer;
}
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ConstantScoreQuery.java Wed Jul 18 21:20:58 2012
@@ -194,6 +194,11 @@ public class ConstantScoreQuery extends
}
@Override
+ public float freq() throws IOException {
+ return 1;
+ }
+
+ @Override
public int advance(int target) throws IOException {
return docIdSetIterator.advance(target);
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java Wed Jul 18 21:20:58 2012
@@ -158,7 +158,7 @@ public class DisjunctionMaxQuery extends
for (Weight w : weights) {
// we will advance() subscorers
Scorer subScorer = w.scorer(context, true, false, acceptDocs);
- if (subScorer != null && subScorer.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+ if (subScorer != null) {
scorers[idx++] = subScorer;
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java Wed Jul 18 21:20:58 2012
@@ -17,9 +17,6 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
/**
* The Scorer for DisjunctionMaxQuery. The union of all documents generated by the the subquery scorers
@@ -27,11 +24,7 @@ import java.util.Collections;
* by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores
* for the other subqueries that generate the document.
*/
-class DisjunctionMaxScorer extends Scorer {
-
- /* The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. */
- private final Scorer[] subScorers;
- private int numScorers;
+class DisjunctionMaxScorer extends DisjunctionScorer {
/* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
private final float tieBreakerMultiplier;
private int doc = -1;
@@ -56,15 +49,8 @@ class DisjunctionMaxScorer extends Score
*/
public DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier,
Scorer[] subScorers, int numScorers) {
- super(weight);
+ super(weight, subScorers, numScorers);
this.tieBreakerMultiplier = tieBreakerMultiplier;
- // The passed subScorers array includes only scorers which have documents
- // (DisjunctionMaxQuery takes care of that), and their nextDoc() was already
- // called.
- this.subScorers = subScorers;
- this.numScorers = numScorers;
-
- heapify();
}
@Override
@@ -114,6 +100,24 @@ class DisjunctionMaxScorer extends Score
}
@Override
+ public float freq() throws IOException {
+ int doc = subScorers[0].docID();
+ int size = numScorers;
+ return 1 + freq(1, size, doc) + freq(2, size, doc);
+ }
+
+ // Recursively iterate all subScorers that generated last doc computing sum and max
+ private int freq(int root, int size, int doc) throws IOException {
+ int freq = 0;
+ if (root < size && subScorers[root].docID() == doc) {
+ freq++;
+ freq += freq((root<<1)+1, size, doc);
+ freq += freq((root<<1)+2, size, doc);
+ }
+ return freq;
+ }
+
+ @Override
public int advance(int target) throws IOException {
if (numScorers == 0) return doc = NO_MORE_DOCS;
while (subScorers[0].docID() < target) {
@@ -128,70 +132,4 @@ class DisjunctionMaxScorer extends Score
}
return doc = subScorers[0].docID();
}
-
- // Organize subScorers into a min heap with scorers generating the earliest document on top.
- private void heapify() {
- for (int i = (numScorers >> 1) - 1; i >= 0; i--) {
- heapAdjust(i);
- }
- }
-
- /* The subtree of subScorers at root is a min heap except possibly for its root element.
- * Bubble the root down as required to make the subtree a heap.
- */
- private void heapAdjust(int root) {
- Scorer scorer = subScorers[root];
- int doc = scorer.docID();
- int i = root;
- while (i <= (numScorers >> 1) - 1) {
- int lchild = (i << 1) + 1;
- Scorer lscorer = subScorers[lchild];
- int ldoc = lscorer.docID();
- int rdoc = Integer.MAX_VALUE, rchild = (i << 1) + 2;
- Scorer rscorer = null;
- if (rchild < numScorers) {
- rscorer = subScorers[rchild];
- rdoc = rscorer.docID();
- }
- if (ldoc < doc) {
- if (rdoc < ldoc) {
- subScorers[i] = rscorer;
- subScorers[rchild] = scorer;
- i = rchild;
- } else {
- subScorers[i] = lscorer;
- subScorers[lchild] = scorer;
- i = lchild;
- }
- } else if (rdoc < doc) {
- subScorers[i] = rscorer;
- subScorers[rchild] = scorer;
- i = rchild;
- } else {
- return;
- }
- }
- }
-
- // Remove the root Scorer from subScorers and re-establish it as a heap
- private void heapRemoveRoot() {
- if (numScorers == 1) {
- subScorers[0] = null;
- numScorers = 0;
- } else {
- subScorers[0] = subScorers[numScorers - 1];
- subScorers[numScorers - 1] = null;
- --numScorers;
- heapAdjust(0);
- }
- }
-
- @Override
- public Collection<ChildScorer> getChildren() {
- final ChildScorer[] children = new ChildScorer[numScorers];
- for (int i = 0; i< numScorers; i++) {
- children[i] = new ChildScorer(subScorers[i], BooleanClause.Occur.SHOULD.toString());
- }
- return Collections.unmodifiableCollection(Arrays.asList(children));
- }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java Wed Jul 18 21:20:58 2012
@@ -20,42 +20,20 @@ package org.apache.lucene.search;
import java.util.List;
import java.io.IOException;
-import org.apache.lucene.util.ScorerDocQueue;
-
/** A Scorer for OR like queries, counterpart of <code>ConjunctionScorer</code>.
* This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers.
*/
-class DisjunctionSumScorer extends Scorer {
- /** The number of subscorers. */
- private final int nrScorers;
-
- /** The subscorers. */
- protected final List<Scorer> subScorers;
-
+class DisjunctionSumScorer extends DisjunctionScorer {
/** The minimum number of scorers that should match. */
private final int minimumNrMatchers;
- /** The scorerDocQueue contains all subscorers ordered by their current doc(),
- * with the minimum at the top.
- * <br>The scorerDocQueue is initialized the first time nextDoc() or advance() is called.
- * <br>An exhausted scorer is immediately removed from the scorerDocQueue.
- * <br>If less than the minimumNrMatchers scorers
- * remain in the scorerDocQueue nextDoc() and advance() return false.
- * <p>
- * After each to call to nextDoc() or advance()
- * <code>currentSumScore</code> is the total score of the current matching doc,
- * <code>nrMatchers</code> is the number of matching scorers,
- * and all scorers are after the matching doc, or are exhausted.
- */
- private final ScorerDocQueue scorerDocQueue;
-
/** The document number of the current match. */
- private int currentDoc = -1;
+ private int doc = -1;
/** The number of subscorers that provide the current match. */
protected int nrMatchers = -1;
- private double currentScore = Float.NaN;
+ private double score = Float.NaN;
/** Construct a <code>DisjunctionScorer</code>.
* @param weight The weight to be used.
@@ -69,21 +47,16 @@ class DisjunctionSumScorer extends Score
* it more efficient to use <code>ConjunctionScorer</code>.
*/
public DisjunctionSumScorer(Weight weight, List<Scorer> subScorers, int minimumNrMatchers) throws IOException {
- super(weight);
-
- nrScorers = subScorers.size();
+ super(weight, subScorers.toArray(new Scorer[subScorers.size()]), subScorers.size());
if (minimumNrMatchers <= 0) {
throw new IllegalArgumentException("Minimum nr of matchers must be positive");
}
- if (nrScorers <= 1) {
+ if (numScorers <= 1) {
throw new IllegalArgumentException("There must be at least 2 subScorers");
}
this.minimumNrMatchers = minimumNrMatchers;
- this.subScorers = subScorers;
-
- scorerDocQueue = initScorerDocQueue();
}
/** Construct a <code>DisjunctionScorer</code>, using one as the minimum number
@@ -93,119 +66,66 @@ class DisjunctionSumScorer extends Score
this(weight, subScorers, 1);
}
- /** Called the first time nextDoc() or advance() is called to
- * initialize <code>scorerDocQueue</code>.
- * @return
- */
- private ScorerDocQueue initScorerDocQueue() throws IOException {
- final ScorerDocQueue docQueue = new ScorerDocQueue(nrScorers);
- for (final Scorer se : subScorers) {
- if (se.nextDoc() != NO_MORE_DOCS) {
- docQueue.insert(se);
- }
- }
- return docQueue;
- }
-
- /** Scores and collects all matching documents.
- * @param collector The collector to which all matching documents are passed through.
- */
- @Override
- public void score(Collector collector) throws IOException {
- collector.setScorer(this);
- while (nextDoc() != NO_MORE_DOCS) {
- collector.collect(currentDoc);
- }
- }
-
- /** Expert: Collects matching documents in a range. Hook for optimization.
- * Note that {@link #nextDoc()} must be called once before this method is called
- * for the first time.
- * @param collector The collector to which all matching documents are passed through.
- * @param max Do not score documents past this.
- * @return true if more matching documents may remain.
- */
- @Override
- public boolean score(Collector collector, int max, int firstDocID) throws IOException {
- // firstDocID is ignored since nextDoc() sets 'currentDoc'
- collector.setScorer(this);
- while (currentDoc < max) {
- collector.collect(currentDoc);
- if (nextDoc() == NO_MORE_DOCS) {
- return false;
- }
- }
- return true;
- }
-
@Override
public int nextDoc() throws IOException {
-
- if (scorerDocQueue.size() < minimumNrMatchers || !advanceAfterCurrent()) {
- currentDoc = NO_MORE_DOCS;
- }
- return currentDoc;
- }
-
- /** Advance all subscorers after the current document determined by the
- * top of the <code>scorerDocQueue</code>.
- * Repeat until at least the minimum number of subscorers match on the same
- * document and all subscorers are after that document or are exhausted.
- * <br>On entry the <code>scorerDocQueue</code> has at least <code>minimumNrMatchers</code>
- * available. At least the scorer with the minimum document number will be advanced.
- * @return true iff there is a match.
- * <br>In case there is a match, </code>currentDoc</code>, </code>currentSumScore</code>,
- * and </code>nrMatchers</code> describe the match.
- *
- * TODO: Investigate whether it is possible to use advance() when
- * the minimum number of matchers is bigger than one, ie. try and use the
- * character of ConjunctionScorer for the minimum number of matchers.
- * Also delay calling score() on the sub scorers until the minimum number of
- * matchers is reached.
- * <br>For this, a Scorer array with minimumNrMatchers elements might
- * hold Scorers at currentDoc that are temporarily popped from scorerQueue.
- */
- protected boolean advanceAfterCurrent() throws IOException {
- do { // repeat until minimum nr of matchers
- currentDoc = scorerDocQueue.topDoc();
- currentScore = scorerDocQueue.topScore();
- nrMatchers = 1;
- do { // Until all subscorers are after currentDoc
- if (!scorerDocQueue.topNextAndAdjustElsePop()) {
- if (scorerDocQueue.size() == 0) {
- break; // nothing more to advance, check for last match.
+ while(true) {
+ while (subScorers[0].docID() == doc) {
+ if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
+ heapAdjust(0);
+ } else {
+ heapRemoveRoot();
+ if (numScorers < minimumNrMatchers) {
+ return doc = NO_MORE_DOCS;
}
}
- if (scorerDocQueue.topDoc() != currentDoc) {
- break; // All remaining subscorers are after currentDoc.
- }
- currentScore += scorerDocQueue.topScore();
- nrMatchers++;
- } while (true);
-
+ }
+ afterNext();
if (nrMatchers >= minimumNrMatchers) {
- return true;
- } else if (scorerDocQueue.size() < minimumNrMatchers) {
- return false;
+ break;
}
- } while (true);
+ }
+
+ return doc;
+ }
+
+ private void afterNext() throws IOException {
+ final Scorer sub = subScorers[0];
+ doc = sub.docID();
+ score = sub.score();
+ nrMatchers = 1;
+ countMatches(1);
+ countMatches(2);
+ }
+
+ // TODO: this currently scores, but so did the previous impl
+ // TODO: remove recursion.
+ // TODO: if we separate scoring, out of here, modify this
+ // and afterNext() to terminate when nrMatchers == minimumNrMatchers
+ // then also change freq() to just always compute it from scratch
+ private void countMatches(int root) throws IOException {
+ if (root < numScorers && subScorers[root].docID() == doc) {
+ nrMatchers++;
+ score += subScorers[root].score();
+ countMatches((root<<1)+1);
+ countMatches((root<<1)+2);
+ }
}
/** Returns the score of the current document matching the query.
* Initially invalid, until {@link #nextDoc()} is called the first time.
*/
@Override
- public float score() throws IOException { return (float)currentScore; }
+ public float score() throws IOException {
+ return (float)score;
+ }
@Override
public int docID() {
- return currentDoc;
+ return doc;
}
-
- /** Returns the number of subscorers matching the current document.
- * Initially invalid, until {@link #nextDoc()} is called the first time.
- */
- public int nrMatchers() {
+
+ @Override
+ public float freq() throws IOException {
return nrMatchers;
}
@@ -221,20 +141,24 @@ class DisjunctionSumScorer extends Score
*/
@Override
public int advance(int target) throws IOException {
- if (scorerDocQueue.size() < minimumNrMatchers) {
- return currentDoc = NO_MORE_DOCS;
- }
- if (target <= currentDoc) {
- return currentDoc;
- }
- do {
- if (scorerDocQueue.topDoc() >= target) {
- return advanceAfterCurrent() ? currentDoc : (currentDoc = NO_MORE_DOCS);
- } else if (!scorerDocQueue.topSkipToAndAdjustElsePop(target)) {
- if (scorerDocQueue.size() < minimumNrMatchers) {
- return currentDoc = NO_MORE_DOCS;
+ if (numScorers == 0) return doc = NO_MORE_DOCS;
+ while (subScorers[0].docID() < target) {
+ if (subScorers[0].advance(target) != NO_MORE_DOCS) {
+ heapAdjust(0);
+ } else {
+ heapRemoveRoot();
+ if (numScorers == 0) {
+ return doc = NO_MORE_DOCS;
}
}
- } while (true);
+ }
+
+ afterNext();
+
+ if (nrMatchers >= minimumNrMatchers) {
+ return doc;
+ } else {
+ return nextDoc();
+ }
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/FilteredQuery.java Wed Jul 18 21:20:58 2012
@@ -24,6 +24,8 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
import java.util.Set;
@@ -221,6 +223,14 @@ public class FilteredQuery extends Query
public float score() throws IOException {
return scorer.score();
}
+
+ @Override
+ public float freq() throws IOException { return scorer.freq(); }
+
+ @Override
+ public Collection<ChildScorer> getChildren() {
+ return Collections.singleton(new ChildScorer(scorer, "FILTERED"));
+ }
};
}
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/MatchAllDocsQuery.java Wed Jul 18 21:20:58 2012
@@ -68,6 +68,11 @@ public class MatchAllDocsQuery extends Q
}
@Override
+ public float freq() {
+ return 1;
+ }
+
+ @Override
public int advance(int target) throws IOException {
doc = target-1;
return nextDoc();
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/MatchOnlyTermScorer.java Wed Jul 18 21:20:58 2012
@@ -30,6 +30,7 @@ import org.apache.lucene.search.similari
final class MatchOnlyTermScorer extends Scorer {
private final DocsEnum docsEnum;
private final Similarity.ExactSimScorer docScorer;
+ private final int docFreq;
/**
* Construct a <code>TermScorer</code>.
@@ -41,11 +42,14 @@ final class MatchOnlyTermScorer extends
* @param docScorer
* The </code>Similarity.ExactSimScorer</code> implementation
* to be used for score computations.
+ * @param docFreq
+ * per-segment docFreq of this term
*/
- MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer) {
+ MatchOnlyTermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer, int docFreq) {
super(weight);
this.docScorer = docScorer;
this.docsEnum = td;
+ this.docFreq = docFreq;
}
@Override
@@ -91,4 +95,18 @@ final class MatchOnlyTermScorer extends
/** Returns a string representation of this <code>TermScorer</code>. */
@Override
public String toString() { return "scorer(" + weight + ")"; }
+
+ // TODO: benchmark if the specialized conjunction really benefits
+ // from these, or if instead its from sorting by docFreq, or both
+
+ DocsEnum getDocsEnum() {
+ return docsEnum;
+ }
+
+ // TODO: generalize something like this for scorers?
+ // even this is just an estimation...
+
+ int getDocFreq() {
+ return docFreq;
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ReqExclScorer.java Wed Jul 18 21:20:58 2012
@@ -18,7 +18,8 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
-
+import java.util.Collection;
+import java.util.Collections;
/** A Scorer for queries with a required subscorer
* and an excluding (prohibited) sub DocIdSetIterator.
@@ -104,6 +105,16 @@ class ReqExclScorer extends Scorer {
}
@Override
+ public float freq() throws IOException {
+ return reqScorer.freq();
+ }
+
+ @Override
+ public Collection<ChildScorer> getChildren() {
+ return Collections.singleton(new ChildScorer(reqScorer, "FILTERED"));
+ }
+
+ @Override
public int advance(int target) throws IOException {
if (reqScorer == null) {
return doc = NO_MORE_DOCS;
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ReqOptSumScorer.java Wed Jul 18 21:20:58 2012
@@ -17,6 +17,8 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
/** A Scorer for queries with a required part and an optional part.
* Delays skipTo() on the optional part until a score() is needed.
@@ -39,6 +41,8 @@ class ReqOptSumScorer extends Scorer {
Scorer optScorer)
{
super(reqScorer.weight);
+ assert reqScorer != null;
+ assert optScorer != null;
this.reqScorer = reqScorer;
this.optScorer = optScorer;
}
@@ -80,5 +84,19 @@ class ReqOptSumScorer extends Scorer {
return optScorerDoc == curDoc ? reqScore + optScorer.score() : reqScore;
}
+ @Override
+ public float freq() throws IOException {
+ // we might have deferred advance()
+ score();
+ return (optScorer != null && optScorer.docID() == reqScorer.docID()) ? 2 : 1;
+ }
+
+ @Override
+ public Collection<ChildScorer> getChildren() {
+ ArrayList<ChildScorer> children = new ArrayList<ChildScorer>(2);
+ children.add(new ChildScorer(reqScorer, "MUST"));
+ children.add(new ChildScorer(optScorer, "SHOULD"));
+ return children;
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/ScoreCachingWrappingScorer.java Wed Jul 18 21:20:58 2012
@@ -18,6 +18,8 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
/**
* A {@link Scorer} which wraps another scorer and caches the score of the
@@ -59,6 +61,11 @@ public class ScoreCachingWrappingScorer
}
@Override
+ public float freq() throws IOException {
+ return scorer.freq();
+ }
+
+ @Override
public int docID() {
return scorer.docID();
}
@@ -77,5 +84,9 @@ public class ScoreCachingWrappingScorer
public int advance(int target) throws IOException {
return scorer.advance(target);
}
-
+
+ @Override
+ public Collection<ChildScorer> getChildren() {
+ return Collections.singleton(new ChildScorer(scorer, "CACHED"));
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/Scorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/Scorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/Scorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/Scorer.java Wed Jul 18 21:20:58 2012
@@ -98,9 +98,7 @@ public abstract class Scorer extends Doc
* "sloppy" the match was.
*
* @lucene.experimental */
- public float freq() throws IOException {
- throw new UnsupportedOperationException(this + " does not implement freq()");
- }
+ public abstract float freq() throws IOException;
/** returns parent Weight
* @lucene.experimental
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/TermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/TermQuery.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/TermQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/TermQuery.java Wed Jul 18 21:20:58 2012
@@ -85,27 +85,20 @@ public class TermQuery extends Query {
}
DocsEnum docs = termsEnum.docs(acceptDocs, null, true);
if (docs != null) {
- return new TermScorer(this, docs, createDocScorer(context));
+ return new TermScorer(this, docs, similarity.exactSimScorer(stats, context), termsEnum.docFreq());
} else {
// Index does not store freq info
docs = termsEnum.docs(acceptDocs, null, false);
assert docs != null;
- return new MatchOnlyTermScorer(this, docs, createDocScorer(context));
+ return new MatchOnlyTermScorer(this, docs, similarity.exactSimScorer(stats, context), termsEnum.docFreq());
}
}
/**
- * Creates an {@link ExactSimScorer} for this {@link TermWeight}*/
- ExactSimScorer createDocScorer(AtomicReaderContext context)
- throws IOException {
- return similarity.exactSimScorer(stats, context);
- }
-
- /**
* Returns a {@link TermsEnum} positioned at this weights Term or null if
* the term does not exist in the given context
*/
- TermsEnum getTermsEnum(AtomicReaderContext context) throws IOException {
+ private TermsEnum getTermsEnum(AtomicReaderContext context) throws IOException {
final TermState state = termStates.get(context.ord);
if (state == null) { // term is not present in that reader
assert termNotInReader(context.reader(), term.field(), term.bytes()) : "no termstate found but term exists in reader term=" + term;
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/TermScorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/TermScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/TermScorer.java Wed Jul 18 21:20:58 2012
@@ -27,6 +27,7 @@ import org.apache.lucene.search.similari
final class TermScorer extends Scorer {
private final DocsEnum docsEnum;
private final Similarity.ExactSimScorer docScorer;
+ private final int docFreq;
/**
* Construct a <code>TermScorer</code>.
@@ -38,11 +39,14 @@ final class TermScorer extends Scorer {
* @param docScorer
* The </code>Similarity.ExactSimScorer</code> implementation
* to be used for score computations.
+ * @param docFreq
+ * per-segment docFreq of this term
*/
- TermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer) {
+ TermScorer(Weight weight, DocsEnum td, Similarity.ExactSimScorer docScorer, int docFreq) {
super(weight);
this.docScorer = docScorer;
this.docsEnum = td;
+ this.docFreq = docFreq;
}
@Override
@@ -89,4 +93,17 @@ final class TermScorer extends Scorer {
@Override
public String toString() { return "scorer(" + weight + ")"; }
+ // TODO: benchmark if the specialized conjunction really benefits
+ // from this, or if instead its from sorting by docFreq, or both
+
+ DocsEnum getDocsEnum() {
+ return docsEnum;
+ }
+
+ // TODO: generalize something like this for scorers?
+ // even this is just an estimation...
+
+ int getDocFreq() {
+ return docFreq;
+ }
}
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java Wed Jul 18 21:20:58 2012
@@ -225,6 +225,11 @@ final class JustCompileSearch {
public float score() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
+
+ @Override
+ public float freq() {
+ throw new UnsupportedOperationException(UNSUPPORTED_MSG);
+ }
@Override
public int docID() {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestBooleanScorer.java Wed Jul 18 21:20:58 2012
@@ -81,6 +81,7 @@ public class TestBooleanScorer extends L
Scorer[] scorers = new Scorer[] {new Scorer(weight) {
private int doc = -1;
@Override public float score() { return 0; }
+ @Override public float freq() { return 0; }
@Override public int docID() { return doc; }
@Override public int nextDoc() {
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestCachingCollector.java Wed Jul 18 21:20:58 2012
@@ -34,6 +34,9 @@ public class TestCachingCollector extend
@Override
public float score() throws IOException { return 0; }
+
+ @Override
+ public float freq() throws IOException { return 0; }
@Override
public int docID() { return 0; }
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestPositiveScoresOnlyCollector.java Wed Jul 18 21:20:58 2012
@@ -35,6 +35,10 @@ public class TestPositiveScoresOnlyColle
@Override public float score() {
return idx == scores.length ? Float.NaN : scores[idx];
}
+
+ @Override public float freq() {
+ return 1;
+ }
@Override public int docID() { return idx; }
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestScoreCachingWrappingScorer.java Wed Jul 18 21:20:58 2012
@@ -43,6 +43,10 @@ public class TestScoreCachingWrappingSco
// once per document.
return idx == scores.length ? Float.NaN : scores[idx++];
}
+
+ @Override public float freq() throws IOException {
+ return 1;
+ }
@Override public int docID() { return doc; }
Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/search/TestSubScorerFreqs.java Wed Jul 18 21:20:58 2012
@@ -75,7 +75,7 @@ public class TestSubScorerFreqs extends
private final Set<String> relationships;
public CountingCollector(Collector other) {
- this(other, new HashSet<String>(Arrays.asList(Occur.MUST.toString(), Occur.SHOULD.toString(), Occur.MUST_NOT.toString())));
+ this(other, new HashSet<String>(Arrays.asList("MUST", "SHOULD", "MUST_NOT")));
}
public CountingCollector(Collector other, Set<String> relationships) {
@@ -161,9 +161,9 @@ public class TestSubScorerFreqs extends
query.add(inner, Occur.MUST);
query.add(aQuery, Occur.MUST);
query.add(dQuery, Occur.MUST);
- @SuppressWarnings({"rawtypes","unchecked"}) Set<String>[] occurList = new Set[] {
- Collections.singleton(Occur.MUST.toString()),
- new HashSet<String>(Arrays.asList(Occur.MUST.toString(), Occur.SHOULD.toString()))
+ Set<String>[] occurList = new Set[] {
+ Collections.singleton("MUST"),
+ new HashSet<String>(Arrays.asList("MUST", "SHOULD"))
};
for (Set<String> occur : occurList) {
CountingCollector c = new CountingCollector(TopScoreDocCollector.create(
@@ -171,7 +171,7 @@ public class TestSubScorerFreqs extends
s.search(query, null, c);
final int maxDocs = s.getIndexReader().maxDoc();
assertEquals(maxDocs, c.docCounts.size());
- boolean includeOptional = occur.contains(Occur.SHOULD.toString());
+ boolean includeOptional = occur.contains("SHOULD");
for (int i = 0; i < maxDocs; i++) {
Map<Query, Float> doc0 = c.docCounts.get(i);
assertEquals(includeOptional ? 5 : 4, doc0.size());
Modified: lucene/dev/branches/branch_4x/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java (original)
+++ lucene/dev/branches/branch_4x/lucene/grouping/src/java/org/apache/lucene/search/grouping/BlockGroupingCollector.java Wed Jul 18 21:20:58 2012
@@ -97,6 +97,11 @@ public class BlockGroupingCollector exte
public float score() {
return score;
}
+
+ @Override
+ public float freq() {
+ throw new UnsupportedOperationException(); // TODO: wtf does this class do?
+ }
@Override
public int docID() {
Modified: lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java Wed Jul 18 21:20:58 2012
@@ -208,6 +208,11 @@ class TermsIncludingScoreQuery extends Q
} while (docId != DocIdSetIterator.NO_MORE_DOCS);
return docId;
}
+
+ @Override
+ public float freq() {
+ return 1;
+ }
}
// This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted
Modified: lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToChildBlockJoinQuery.java Wed Jul 18 21:20:58 2012
@@ -161,6 +161,7 @@ public class ToChildBlockJoinQuery exten
private final Bits acceptDocs;
private float parentScore;
+ private float parentFreq = 1;
private int childDoc = -1;
private int parentDoc;
@@ -175,7 +176,7 @@ public class ToChildBlockJoinQuery exten
@Override
public Collection<ChildScorer> getChildren() {
- return Collections.singletonList(new ChildScorer(parentScorer, "BLOCK_JOIN"));
+ return Collections.singleton(new ChildScorer(parentScorer, "BLOCK_JOIN"));
}
@Override
@@ -218,6 +219,7 @@ public class ToChildBlockJoinQuery exten
if (childDoc < parentDoc) {
if (doScores) {
parentScore = parentScorer.score();
+ parentFreq = parentScorer.freq();
}
//System.out.println(" " + childDoc);
return childDoc;
@@ -248,6 +250,11 @@ public class ToChildBlockJoinQuery exten
}
@Override
+ public float freq() throws IOException {
+ return parentFreq;
+ }
+
+ @Override
public int advance(int childTarget) throws IOException {
assert childTarget >= parentBits.length() || !parentBits.get(childTarget);
@@ -269,6 +276,7 @@ public class ToChildBlockJoinQuery exten
}
if (doScores) {
parentScore = parentScorer.score();
+ parentFreq = parentScorer.freq();
}
final int firstChild = parentBits.prevSetBit(parentDoc-1);
//System.out.println(" firstChild=" + firstChild);
Modified: lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java (original)
+++ lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinCollector.java Wed Jul 18 21:20:58 2012
@@ -327,6 +327,11 @@ public class ToParentBlockJoinCollector
public float score() {
return score;
}
+
+ @Override
+ public float freq() {
+ return 1; // TODO: does anything else make sense?... duplicate of grouping's FakeScorer btw?
+ }
@Override
public int docID() {
Modified: lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/join/src/java/org/apache/lucene/search/join/ToParentBlockJoinQuery.java Wed Jul 18 21:20:58 2012
@@ -218,6 +218,7 @@ public class ToParentBlockJoinQuery exte
private int parentDoc = -1;
private int prevParentDoc;
private float parentScore;
+ private float parentFreq;
private int nextChildDoc;
private int[] pendingChildDocs = new int[5];
@@ -239,7 +240,7 @@ public class ToParentBlockJoinQuery exte
@Override
public Collection<ChildScorer> getChildren() {
- return Collections.singletonList(new ChildScorer(childScorer, "BLOCK_JOIN"));
+ return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN"));
}
int getChildCount() {
@@ -299,7 +300,9 @@ public class ToParentBlockJoinQuery exte
}
float totalScore = 0;
+ float totalFreq = 0;
float maxScore = Float.NEGATIVE_INFINITY;
+ float maxFreq = 0;
childDocUpto = 0;
do {
@@ -315,9 +318,12 @@ public class ToParentBlockJoinQuery exte
if (scoreMode != ScoreMode.None) {
// TODO: specialize this into dedicated classes per-scoreMode
final float childScore = childScorer.score();
+ final float childFreq = childScorer.freq();
pendingChildScores[childDocUpto] = childScore;
maxScore = Math.max(childScore, maxScore);
+ maxFreq = Math.max(childFreq, maxFreq);
totalScore += childScore;
+ totalFreq += childFreq;
}
childDocUpto++;
nextChildDoc = childScorer.nextDoc();
@@ -329,12 +335,15 @@ public class ToParentBlockJoinQuery exte
switch(scoreMode) {
case Avg:
parentScore = totalScore / childDocUpto;
+ parentFreq = totalFreq / childDocUpto;
break;
case Max:
parentScore = maxScore;
+ parentFreq = maxFreq;
break;
case Total:
parentScore = totalScore;
+ parentFreq = totalFreq;
break;
case None:
break;
@@ -354,6 +363,11 @@ public class ToParentBlockJoinQuery exte
public float score() throws IOException {
return parentScore;
}
+
+ @Override
+ public float freq() {
+ return parentFreq;
+ }
@Override
public int advance(int parentTarget) throws IOException {
Modified: lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/CustomScoreQuery.java Wed Jul 18 21:20:58 2012
@@ -18,6 +18,8 @@ package org.apache.lucene.queries;
*/
import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
import java.util.Set;
import java.util.Arrays;
@@ -325,6 +327,16 @@ public class CustomScoreQuery extends Qu
}
@Override
+ public float freq() throws IOException {
+ return subQueryScorer.freq();
+ }
+
+ @Override
+ public Collection<ChildScorer> getChildren() {
+ return Collections.singleton(new ChildScorer(subQueryScorer, "CUSTOM"));
+ }
+
+ @Override
public int advance(int target) throws IOException {
int doc = subQueryScorer.advance(target);
if (doc != NO_MORE_DOCS) {
Modified: lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/BoostedQuery.java Wed Jul 18 21:20:58 2012
@@ -25,6 +25,8 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
import java.util.Set;
import java.util.Map;
@@ -164,6 +166,16 @@ public class BoostedQuery extends Query
return score>Float.NEGATIVE_INFINITY ? score : -Float.MAX_VALUE;
}
+ @Override
+ public float freq() throws IOException {
+ return scorer.freq();
+ }
+
+ @Override
+ public Collection<ChildScorer> getChildren() {
+ return Collections.singleton(new ChildScorer(scorer, "CUSTOM"));
+ }
+
public Explanation explain(int doc) throws IOException {
Explanation subQueryExpl = weight.qWeight.explain(readerContext ,doc);
if (!subQueryExpl.isMatch()) {
Modified: lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java (original)
+++ lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionQuery.java Wed Jul 18 21:20:58 2012
@@ -158,6 +158,11 @@ public class FunctionQuery extends Query
return score>Float.NEGATIVE_INFINITY ? score : -Float.MAX_VALUE;
}
+ @Override
+ public float freq() throws IOException {
+ return 1;
+ }
+
public Explanation explain(int doc) throws IOException {
float sc = qWeight * vals.floatVal(doc);
Modified: lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java (original)
+++ lucene/dev/branches/branch_4x/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java Wed Jul 18 21:20:58 2012
@@ -82,4 +82,9 @@ public class ValueSourceScorer extends S
public float score() throws IOException {
return values.floatVal(doc);
}
+
+ @Override
+ public float freq() throws IOException {
+ return 1;
+ }
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/LatLonType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/LatLonType.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/LatLonType.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/schema/LatLonType.java Wed Jul 18 21:20:58 2012
@@ -485,6 +485,11 @@ class SpatialDistanceQuery extends Exten
return (float)(dist * qWeight);
}
+ @Override
+ public float freq() throws IOException {
+ return 1;
+ }
+
public Explanation explain(int doc) throws IOException {
advance(doc);
boolean matched = this.doc == doc;
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/JoinQParserPlugin.java Wed Jul 18 21:20:58 2012
@@ -532,6 +532,11 @@ class JoinQuery extends Query {
public float score() throws IOException {
return score;
}
+
+ @Override
+ public float freq() throws IOException {
+ return 1;
+ }
@Override
public int advance(int target) throws IOException {
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java?rev=1363121&r1=1363120&r2=1363121&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/search/SolrConstantScoreQuery.java Wed Jul 18 21:20:58 2012
@@ -186,6 +186,11 @@ public class SolrConstantScoreQuery exte
public float score() throws IOException {
return theScore;
}
+
+ @Override
+ public float freq() throws IOException {
+ return 1;
+ }
@Override
public int advance(int target) throws IOException {