You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/08/06 11:55:40 UTC
svn commit: r1694435 - in /lucene/dev/trunk/lucene: CHANGES.txt
core/src/java/org/apache/lucene/search/TopFieldCollector.java
core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
Author: jpountz
Date: Thu Aug 6 09:55:39 2015
New Revision: 1694435
URL: http://svn.apache.org/r1694435
Log:
LUCENE-6708: TopFieldCollector does not compute the score several times on the same document anymore.
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1694435&r1=1694434&r2=1694435&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Aug 6 09:55:39 2015
@@ -43,6 +43,13 @@ API Changes
* LUCENE-6706: PayloadTermQuery and PayloadNearQuery have been removed.
Instead, use PayloadScoreQuery to wrap any SpanQuery. (Alan Woodward)
+======================= Lucene 5.4.0 =======================
+
+Optimizations
+
+* LUCENE-6708: TopFieldCollector does not compute the score several times on the
+ same document anymore. (Adrien Grand)
+
======================= Lucene 5.3.0 =======================
New Features
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java?rev=1694435&r1=1694434&r2=1694435&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java Thu Aug 6 09:55:39 2015
@@ -43,15 +43,20 @@ public abstract class TopFieldCollector
final LeafFieldComparator comparator;
final int reverseMul;
+ final boolean mayNeedScoresTwice;
Scorer scorer;
- OneComparatorLeafCollector(LeafFieldComparator comparator, int reverseMul) {
+ OneComparatorLeafCollector(LeafFieldComparator comparator, int reverseMul, boolean mayNeedScoresTwice) {
this.comparator = comparator;
this.reverseMul = reverseMul;
+ this.mayNeedScoresTwice = mayNeedScoresTwice;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
+ if (mayNeedScoresTwice && scorer instanceof ScoreCachingWrappingScorer == false) {
+ scorer = new ScoreCachingWrappingScorer(scorer);
+ }
this.scorer = scorer;
comparator.setScorer(scorer);
}
@@ -63,13 +68,15 @@ public abstract class TopFieldCollector
final int[] reverseMul;
final LeafFieldComparator firstComparator;
final int firstReverseMul;
+ final boolean mayNeedScoresTwice;
Scorer scorer;
- MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) {
+ MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul, boolean mayNeedScoresTwice) {
this.comparators = comparators;
this.reverseMul = reverseMul;
firstComparator = comparators[0];
firstReverseMul = reverseMul[0];
+ this.mayNeedScoresTwice = mayNeedScoresTwice;
}
protected final int compareBottom(int doc) throws IOException {
@@ -115,6 +122,9 @@ public abstract class TopFieldCollector
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
+ if (mayNeedScoresTwice && scorer instanceof ScoreCachingWrappingScorer == false) {
+ scorer = new ScoreCachingWrappingScorer(scorer);
+ }
for (LeafFieldComparator comparator : comparators) {
comparator.setScorer(scorer);
}
@@ -122,16 +132,29 @@ public abstract class TopFieldCollector
}
/*
- * Implements a TopFieldCollector over one SortField criteria, without
- * tracking document scores and maxScore.
+ * Implements a TopFieldCollector over one SortField criteria, with tracking
+ * document scores and maxScore.
*/
- private static class NonScoringCollector extends TopFieldCollector {
+ private static class SimpleFieldCollector extends TopFieldCollector {
final FieldValueHitQueue<Entry> queue;
+ final boolean trackDocScores;
+ final boolean trackMaxScore;
+ final boolean mayNeedScoresTwice;
- public NonScoringCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields) {
- super(queue, numHits, fillFields, sort.needsScores());
+ public SimpleFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields,
+ boolean trackDocScores, boolean trackMaxScore) {
+ super(queue, numHits, fillFields, sort.needsScores() || trackDocScores || trackMaxScore);
this.queue = queue;
+ if (trackMaxScore) {
+ maxScore = Float.NEGATIVE_INFINITY; // otherwise we would keep NaN
+ }
+ this.trackDocScores = trackDocScores;
+ this.trackMaxScore = trackMaxScore;
+ // If one of the sort fields needs scores, and if we also track scores, then
+ // we might call scorer.score() several times per doc so wrapping the scorer
+ // to cache scores would help
+ this.mayNeedScoresTwice = sort.needsScores() && (trackDocScores || trackMaxScore);
}
@Override
@@ -142,217 +165,43 @@ public abstract class TopFieldCollector
final int[] reverseMul = queue.getReverseMul();
if (comparators.length == 1) {
- return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) {
+ return new OneComparatorLeafCollector(comparators[0], reverseMul[0], mayNeedScoresTwice) {
@Override
public void collect(int doc) throws IOException {
- ++totalHits;
- if (queueFull) {
- if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
- // since docs are visited in doc Id order, if compare is 0, it means
- // this document is larger than anything else in the queue, and
- // therefore not competitive.
- return;
- }
-
- // This hit is competitive - replace bottom element in queue & adjustTop
- comparator.copy(bottom.slot, doc);
- updateBottom(doc);
- comparator.setBottom(bottom.slot);
- } else {
- // Startup transient: queue hasn't gathered numHits yet
- final int slot = totalHits - 1;
- // Copy hit into queue
- comparator.copy(slot, doc);
- add(slot, doc, Float.NaN);
- if (queueFull) {
- comparator.setBottom(bottom.slot);
- }
- }
- }
-
- };
- } else {
- return new MultiComparatorLeafCollector(comparators, reverseMul) {
-
- @Override
- public void collect(int doc) throws IOException {
- ++totalHits;
- if (queueFull) {
- if ((compareBottom(doc)) <= 0) {
- // since docs are visited in doc Id order, if compare is 0, it means
- // this document is larger than anything else in the queue, and
- // therefore not competitive.
- return;
- }
-
- // This hit is competitive - replace bottom element in queue & adjustTop
- copy(bottom.slot, doc);
- updateBottom(doc);
- setBottom(bottom.slot);
- } else {
- // Startup transient: queue hasn't gathered numHits yet
- final int slot = totalHits - 1;
- // Copy hit into queue
- copy(slot, doc);
- add(slot, doc, Float.NaN);
- if (queueFull) {
- setBottom(bottom.slot);
+ float score = Float.NaN;
+ if (trackMaxScore) {
+ score = scorer.score();
+ if (score > maxScore) {
+ maxScore = score;
}
}
- }
- };
- }
- }
-
- }
-
- /*
- * Implements a TopFieldCollector over one SortField criteria, while tracking
- * document scores but no maxScore.
- */
- private static class ScoringNoMaxScoreCollector extends TopFieldCollector {
-
- final FieldValueHitQueue<Entry> queue;
-
- public ScoringNoMaxScoreCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields) {
- super(queue, numHits, fillFields, true);
- this.queue = queue;
- }
-
- @Override
- public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
- docBase = context.docBase;
-
- final LeafFieldComparator[] comparators = queue.getComparators(context);
- final int[] reverseMul = queue.getReverseMul();
-
- if (comparators.length == 1) {
- return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) {
-
- @Override
- public void collect(int doc) throws IOException {
++totalHits;
if (queueFull) {
- if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
+ if (reverseMul * comparator.compareBottom(doc) <= 0) {
// since docs are visited in doc Id order, if compare is 0, it means
// this document is largest than anything else in the queue, and
// therefore not competitive.
return;
}
- // Compute the score only if the hit is competitive.
- final float score = scorer.score();
+ if (trackDocScores && !trackMaxScore) {
+ score = scorer.score();
+ }
// This hit is competitive - replace bottom element in queue & adjustTop
comparator.copy(bottom.slot, doc);
updateBottom(doc, score);
comparator.setBottom(bottom.slot);
} else {
- // Compute the score only if the hit is competitive.
- final float score = scorer.score();
-
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
- // Copy hit into queue
- comparator.copy(slot, doc);
- add(slot, doc, score);
- if (queueFull) {
- comparator.setBottom(bottom.slot);
- }
- }
- }
-
- };
- } else {
- return new MultiComparatorLeafCollector(comparators, reverseMul) {
- @Override
- public void collect(int doc) throws IOException {
- ++totalHits;
- if (queueFull) {
- if ((compareBottom(doc)) <= 0) {
- // since docs are visited in doc Id order, if compare is 0, it means
- // this document is largest than anything else in the queue, and
- // therefore not competitive.
- return;
+ if (trackDocScores && !trackMaxScore) {
+ score = scorer.score();
}
- // Compute the score only if the hit is competitive.
- final float score = scorer.score();
-
- // This hit is competitive - replace bottom element in queue & adjustTop
- copy(bottom.slot, doc);
- updateBottom(doc, score);
- setBottom(bottom.slot);
- } else {
- // Compute the score only if the hit is competitive.
- final float score = scorer.score();
-
- // Startup transient: queue hasn't gathered numHits yet
- final int slot = totalHits - 1;
- // Copy hit into queue
- copy(slot, doc);
- add(slot, doc, score);
- if (queueFull) {
- setBottom(bottom.slot);
- }
- }
- }
-
- };
- }
- }
-
- }
-
- /*
- * Implements a TopFieldCollector over one SortField criteria, with tracking
- * document scores and maxScore.
- */
- private static class ScoringMaxScoreCollector extends TopFieldCollector {
-
- final FieldValueHitQueue<Entry> queue;
-
- public ScoringMaxScoreCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields) {
- super(queue, numHits, fillFields, true);
- this.queue = queue;
- maxScore = Float.MIN_NORMAL; // otherwise we would keep NaN
- }
-
- @Override
- public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
- docBase = context.docBase;
-
- final LeafFieldComparator[] comparators = queue.getComparators(context);
- final int[] reverseMul = queue.getReverseMul();
-
- if (comparators.length == 1) {
- return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) {
-
- @Override
- public void collect(int doc) throws IOException {
- final float score = scorer.score();
- if (score > maxScore) {
- maxScore = score;
- }
- ++totalHits;
- if (queueFull) {
- if (reverseMul * comparator.compareBottom(doc) <= 0) {
- // since docs are visited in doc Id order, if compare is 0, it means
- // this document is largest than anything else in the queue, and
- // therefore not competitive.
- return;
- }
-
- // This hit is competitive - replace bottom element in queue & adjustTop
- comparator.copy(bottom.slot, doc);
- updateBottom(doc, score);
- comparator.setBottom(bottom.slot);
- } else {
- // Startup transient: queue hasn't gathered numHits yet
- final int slot = totalHits - 1;
// Copy hit into queue
comparator.copy(slot, doc);
add(slot, doc, score);
@@ -364,14 +213,18 @@ public abstract class TopFieldCollector
};
} else {
- return new MultiComparatorLeafCollector(comparators, reverseMul) {
+ return new MultiComparatorLeafCollector(comparators, reverseMul, mayNeedScoresTwice) {
@Override
public void collect(int doc) throws IOException {
- final float score = scorer.score();
- if (score > maxScore) {
- maxScore = score;
+ float score = Float.NaN;
+ if (trackMaxScore) {
+ score = scorer.score();
+ if (score > maxScore) {
+ maxScore = score;
+ }
}
+
++totalHits;
if (queueFull) {
if (compareBottom(doc) <= 0) {
@@ -381,6 +234,10 @@ public abstract class TopFieldCollector
return;
}
+ if (trackDocScores && !trackMaxScore) {
+ score = scorer.score();
+ }
+
// This hit is competitive - replace bottom element in queue & adjustTop
copy(bottom.slot, doc);
updateBottom(doc, score);
@@ -388,6 +245,11 @@ public abstract class TopFieldCollector
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = totalHits - 1;
+
+ if (trackDocScores && !trackMaxScore) {
+ score = scorer.score();
+ }
+
// Copy hit into queue
copy(slot, doc);
add(slot, doc, score);
@@ -413,6 +275,7 @@ public abstract class TopFieldCollector
final boolean trackDocScores;
final boolean trackMaxScore;
final FieldDoc after;
+ final boolean mayNeedScoresTwice;
public PagingFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, FieldDoc after, int numHits, boolean fillFields,
boolean trackDocScores, boolean trackMaxScore) {
@@ -421,6 +284,7 @@ public abstract class TopFieldCollector
this.trackDocScores = trackDocScores;
this.trackMaxScore = trackMaxScore;
this.after = after;
+ this.mayNeedScoresTwice = sort.needsScores() && (trackDocScores || trackMaxScore);
// Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
maxScore = Float.NEGATIVE_INFINITY;
@@ -438,7 +302,7 @@ public abstract class TopFieldCollector
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
docBase = context.docBase;
final int afterDoc = after.doc - docBase;
- return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul()) {
+ return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul(), mayNeedScoresTwice) {
@Override
public void collect(int doc) throws IOException {
@@ -628,13 +492,7 @@ public abstract class TopFieldCollector
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
if (after == null) {
- if (trackMaxScore) {
- return new ScoringMaxScoreCollector(sort, queue, numHits, fillFields);
- } else if (trackDocScores) {
- return new ScoringNoMaxScoreCollector(sort, queue, numHits, fillFields);
- } else {
- return new NonScoringCollector(sort, queue, numHits, fillFields);
- }
+ return new SimpleFieldCollector(sort, queue, numHits, fillFields, trackDocScores, trackMaxScore);
} else {
if (after.fields == null) {
throw new IllegalArgumentException("after.fields wasn't set; you must pass fillFields=true for the previous search");
Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java?rev=1694435&r1=1694434&r2=1694435&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java Thu Aug 6 09:55:39 2015
@@ -17,13 +17,21 @@ package org.apache.lucene.search;
* limitations under the License.
*/
+import java.io.IOException;
+
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.FieldValueHitQueue.Entry;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
public class TestTopFieldCollector extends LuceneTestCase {
private IndexSearcher is;
@@ -166,5 +174,99 @@ public class TestTopFieldCollector exten
assertEquals(0, td.totalHits);
assertTrue(Float.isNaN(td.getMaxScore()));
}
- }
+ }
+
+ public void testComputeScoresOnlyOnce() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ Document doc = new Document();
+ StringField text = new StringField("text", "foo", Store.NO);
+ doc.add(text);
+ NumericDocValuesField relevance = new NumericDocValuesField("relevance", 1);
+ doc.add(relevance);
+ w.addDocument(doc);
+ text.setStringValue("bar");
+ w.addDocument(doc);
+ text.setStringValue("baz");
+ w.addDocument(doc);
+ IndexReader reader = w.getReader();
+ TermQuery foo = new TermQuery(new Term("text", "foo"));
+ TermQuery bar = new TermQuery(new Term("text", "bar"));
+ bar.setBoost(2);
+ TermQuery baz = new TermQuery(new Term("text", "baz"));
+ baz.setBoost(3);
+ Query query = new BooleanQuery.Builder()
+ .add(foo, Occur.SHOULD)
+ .add(bar, Occur.SHOULD)
+ .add(baz, Occur.SHOULD)
+ .build();
+ final IndexSearcher searcher = new IndexSearcher(reader);
+ for (Sort sort : new Sort[] {new Sort(SortField.FIELD_SCORE), new Sort(new SortField("f", SortField.Type.SCORE))}) {
+ for (boolean doDocScores : new boolean[] {false, true}) {
+ for (boolean doMaxScore : new boolean[] {false, true}) {
+ final TopFieldCollector topCollector = TopFieldCollector.create(sort, TestUtil.nextInt(random(), 1, 2), true, doDocScores, doMaxScore);
+ final Collector assertingCollector = new Collector() {
+ @Override
+ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
+ final LeafCollector in = topCollector.getLeafCollector(context);
+ return new FilterLeafCollector(in) {
+ @Override
+ public void setScorer(final Scorer scorer) throws IOException {
+ Scorer s = new Scorer(null) {
+
+ int lastComputedDoc = -1;
+
+ @Override
+ public float score() throws IOException {
+ if (lastComputedDoc == docID()) {
+ throw new AssertionError("Score computed twice on " + docID());
+ }
+ lastComputedDoc = docID();
+ return scorer.score();
+ }
+
+ @Override
+ public int freq() throws IOException {
+ return scorer.freq();
+ }
+
+ @Override
+ public int docID() {
+ return scorer.docID();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return scorer.nextDoc();
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return scorer.advance(target);
+ }
+
+ @Override
+ public long cost() {
+ return scorer.cost();
+ }
+
+ };
+ super.setScorer(s);
+ }
+ };
+ }
+ @Override
+ public boolean needsScores() {
+ return topCollector.needsScores();
+ }
+ };
+ searcher.search(query, assertingCollector);
+ }
+ }
+ }
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
}