You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/08/06 11:55:40 UTC

svn commit: r1694435 - in /lucene/dev/trunk/lucene: CHANGES.txt core/src/java/org/apache/lucene/search/TopFieldCollector.java core/src/test/org/apache/lucene/search/TestTopFieldCollector.java

Author: jpountz
Date: Thu Aug  6 09:55:39 2015
New Revision: 1694435

URL: http://svn.apache.org/r1694435
Log:
LUCENE-6708: TopFieldCollector does not compute the score several times on the same document anymore.

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1694435&r1=1694434&r2=1694435&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Aug  6 09:55:39 2015
@@ -43,6 +43,13 @@ API Changes
 * LUCENE-6706: PayloadTermQuery and PayloadNearQuery have been removed.
   Instead, use PayloadScoreQuery to wrap any SpanQuery. (Alan Woodward)
 
+======================= Lucene 5.4.0 =======================
+
+Optimizations
+
+* LUCENE-6708: TopFieldCollector does not compute the score several times on the
+  same document anymore. (Adrien Grand)
+
 ======================= Lucene 5.3.0 =======================
 
 New Features

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java?rev=1694435&r1=1694434&r2=1694435&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java Thu Aug  6 09:55:39 2015
@@ -43,15 +43,20 @@ public abstract class TopFieldCollector
 
     final LeafFieldComparator comparator;
     final int reverseMul;
+    final boolean mayNeedScoresTwice;
     Scorer scorer;
 
-    OneComparatorLeafCollector(LeafFieldComparator comparator, int reverseMul) {
+    OneComparatorLeafCollector(LeafFieldComparator comparator, int reverseMul, boolean mayNeedScoresTwice) {
       this.comparator = comparator;
       this.reverseMul = reverseMul;
+      this.mayNeedScoresTwice = mayNeedScoresTwice;
     }
 
     @Override
     public void setScorer(Scorer scorer) throws IOException {
+      if (mayNeedScoresTwice && scorer instanceof ScoreCachingWrappingScorer == false) {
+        scorer = new ScoreCachingWrappingScorer(scorer);
+      }
       this.scorer = scorer;
       comparator.setScorer(scorer);
     }
@@ -63,13 +68,15 @@ public abstract class TopFieldCollector
     final int[] reverseMul;
     final LeafFieldComparator firstComparator;
     final int firstReverseMul;
+    final boolean mayNeedScoresTwice;
     Scorer scorer;
 
-    MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) {
+    MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul, boolean mayNeedScoresTwice) {
       this.comparators = comparators;
       this.reverseMul = reverseMul;
       firstComparator = comparators[0];
       firstReverseMul = reverseMul[0];
+      this.mayNeedScoresTwice = mayNeedScoresTwice;
     }
 
     protected final int compareBottom(int doc) throws IOException {
@@ -115,6 +122,9 @@ public abstract class TopFieldCollector
     @Override
     public void setScorer(Scorer scorer) throws IOException {
       this.scorer = scorer;
+      if (mayNeedScoresTwice && scorer instanceof ScoreCachingWrappingScorer == false) {
+        scorer = new ScoreCachingWrappingScorer(scorer);
+      }
       for (LeafFieldComparator comparator : comparators) {
         comparator.setScorer(scorer);
       }
@@ -122,16 +132,29 @@ public abstract class TopFieldCollector
   }
 
   /*
-   * Implements a TopFieldCollector over one SortField criteria, without
-   * tracking document scores and maxScore.
+   * Implements a TopFieldCollector over one SortField criteria, with tracking
+   * document scores and maxScore.
    */
-  private static class NonScoringCollector extends TopFieldCollector {
+  private static class SimpleFieldCollector extends TopFieldCollector {
 
     final FieldValueHitQueue<Entry> queue;
+    final boolean trackDocScores;
+    final boolean trackMaxScore;
+    final boolean mayNeedScoresTwice;
 
-    public NonScoringCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields) {
-      super(queue, numHits, fillFields, sort.needsScores());
+    public SimpleFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields,
+        boolean trackDocScores, boolean trackMaxScore) {
+      super(queue, numHits, fillFields, sort.needsScores() || trackDocScores || trackMaxScore);
       this.queue = queue;
+      if (trackMaxScore) {
+        maxScore = Float.NEGATIVE_INFINITY; // otherwise we would keep NaN
+      }
+      this.trackDocScores = trackDocScores;
+      this.trackMaxScore = trackMaxScore;
+      // If one of the sort fields needs scores, and if we also track scores, then
+      // we might call scorer.score() several times per doc so wrapping the scorer
+      // to cache scores would help
+      this.mayNeedScoresTwice = sort.needsScores() && (trackDocScores || trackMaxScore);
     }
 
     @Override
@@ -142,217 +165,43 @@ public abstract class TopFieldCollector
       final int[] reverseMul = queue.getReverseMul();
 
       if (comparators.length == 1) {
-        return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) {
+        return new OneComparatorLeafCollector(comparators[0], reverseMul[0], mayNeedScoresTwice) {
 
           @Override
           public void collect(int doc) throws IOException {
-            ++totalHits;
-            if (queueFull) {
-              if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
-                // since docs are visited in doc Id order, if compare is 0, it means
-                // this document is larger than anything else in the queue, and
-                // therefore not competitive.
-                return;
-              }
-
-              // This hit is competitive - replace bottom element in queue & adjustTop
-              comparator.copy(bottom.slot, doc);
-              updateBottom(doc);
-              comparator.setBottom(bottom.slot);
-            } else {
-              // Startup transient: queue hasn't gathered numHits yet
-              final int slot = totalHits - 1;
-              // Copy hit into queue
-              comparator.copy(slot, doc);
-              add(slot, doc, Float.NaN);
-              if (queueFull) {
-                comparator.setBottom(bottom.slot);
-              }
-            }
-          }
-
-        };
-      } else {
-        return new MultiComparatorLeafCollector(comparators, reverseMul) {
-
-          @Override
-          public void collect(int doc) throws IOException {
-            ++totalHits;
-            if (queueFull) {
-              if ((compareBottom(doc)) <= 0) {
-                // since docs are visited in doc Id order, if compare is 0, it means
-                // this document is larger than anything else in the queue, and
-                // therefore not competitive.
-                return;
-              }
-
-              // This hit is competitive - replace bottom element in queue & adjustTop
-              copy(bottom.slot, doc);
-              updateBottom(doc);
-              setBottom(bottom.slot);
-            } else {
-              // Startup transient: queue hasn't gathered numHits yet
-              final int slot = totalHits - 1;
-              // Copy hit into queue
-              copy(slot, doc);
-              add(slot, doc, Float.NaN);
-              if (queueFull) {
-                setBottom(bottom.slot);
+            float score = Float.NaN;
+            if (trackMaxScore) {
+              score = scorer.score();
+              if (score > maxScore) {
+                maxScore = score;
               }
             }
-          }
 
-        };
-      }
-    }
-
-  }
-
-  /*
-   * Implements a TopFieldCollector over one SortField criteria, while tracking
-   * document scores but no maxScore.
-   */
-  private static class ScoringNoMaxScoreCollector extends TopFieldCollector {
-
-    final FieldValueHitQueue<Entry> queue;
-
-    public ScoringNoMaxScoreCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields) {
-      super(queue, numHits, fillFields, true);
-      this.queue = queue;
-    }
-
-    @Override
-    public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
-      docBase = context.docBase;
-
-      final LeafFieldComparator[] comparators = queue.getComparators(context);
-      final int[] reverseMul = queue.getReverseMul();
-
-      if (comparators.length == 1) {
-        return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) {
-
-          @Override
-          public void collect(int doc) throws IOException {
             ++totalHits;
             if (queueFull) {
-              if ((reverseMul * comparator.compareBottom(doc)) <= 0) {
+              if (reverseMul * comparator.compareBottom(doc) <= 0) {
                 // since docs are visited in doc Id order, if compare is 0, it means
                 // this document is largest than anything else in the queue, and
                 // therefore not competitive.
                 return;
               }
 
-              // Compute the score only if the hit is competitive.
-              final float score = scorer.score();
+              if (trackDocScores && !trackMaxScore) {
+                score = scorer.score();
+              }
 
               // This hit is competitive - replace bottom element in queue & adjustTop
               comparator.copy(bottom.slot, doc);
               updateBottom(doc, score);
               comparator.setBottom(bottom.slot);
             } else {
-              // Compute the score only if the hit is competitive.
-              final float score = scorer.score();
-
               // Startup transient: queue hasn't gathered numHits yet
               final int slot = totalHits - 1;
-              // Copy hit into queue
-              comparator.copy(slot, doc);
-              add(slot, doc, score);
-              if (queueFull) {
-                comparator.setBottom(bottom.slot);
-              }
-            }
-          }
-
-        };
-      } else {
-        return new MultiComparatorLeafCollector(comparators, reverseMul) {
 
-          @Override
-          public void collect(int doc) throws IOException {
-            ++totalHits;
-            if (queueFull) {
-              if ((compareBottom(doc)) <= 0) {
-                // since docs are visited in doc Id order, if compare is 0, it means
-                // this document is largest than anything else in the queue, and
-                // therefore not competitive.
-                return;
+              if (trackDocScores && !trackMaxScore) {
+                score = scorer.score();
               }
 
-              // Compute the score only if the hit is competitive.
-              final float score = scorer.score();
-
-              // This hit is competitive - replace bottom element in queue & adjustTop
-              copy(bottom.slot, doc);
-              updateBottom(doc, score);
-              setBottom(bottom.slot);
-            } else {
-              // Compute the score only if the hit is competitive.
-              final float score = scorer.score();
-
-              // Startup transient: queue hasn't gathered numHits yet
-              final int slot = totalHits - 1;
-              // Copy hit into queue
-              copy(slot, doc);
-              add(slot, doc, score);
-              if (queueFull) {
-                setBottom(bottom.slot);
-              }
-            }
-          }
-
-        };
-      }
-    }
-
-  }
-
-  /*
-   * Implements a TopFieldCollector over one SortField criteria, with tracking
-   * document scores and maxScore.
-   */
-  private static class ScoringMaxScoreCollector extends TopFieldCollector {
-
-    final FieldValueHitQueue<Entry> queue;
-
-    public ScoringMaxScoreCollector(Sort sort, FieldValueHitQueue<Entry> queue, int numHits, boolean fillFields) {
-      super(queue, numHits, fillFields, true);
-      this.queue = queue;
-      maxScore = Float.MIN_NORMAL; // otherwise we would keep NaN
-    }
-
-    @Override
-    public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
-      docBase = context.docBase;
-
-      final LeafFieldComparator[] comparators = queue.getComparators(context);
-      final int[] reverseMul = queue.getReverseMul();
-
-      if (comparators.length == 1) {
-        return new OneComparatorLeafCollector(comparators[0], reverseMul[0]) {
-
-          @Override
-          public void collect(int doc) throws IOException {
-            final float score = scorer.score();
-            if (score > maxScore) {
-              maxScore = score;
-            }
-            ++totalHits;
-            if (queueFull) {
-              if (reverseMul * comparator.compareBottom(doc) <= 0) {
-                // since docs are visited in doc Id order, if compare is 0, it means
-                // this document is largest than anything else in the queue, and
-                // therefore not competitive.
-                return;
-              }
-
-              // This hit is competitive - replace bottom element in queue & adjustTop
-              comparator.copy(bottom.slot, doc);
-              updateBottom(doc, score);
-              comparator.setBottom(bottom.slot);
-            } else {
-              // Startup transient: queue hasn't gathered numHits yet
-              final int slot = totalHits - 1;
               // Copy hit into queue
               comparator.copy(slot, doc);
               add(slot, doc, score);
@@ -364,14 +213,18 @@ public abstract class TopFieldCollector
 
         };
       } else {
-        return new MultiComparatorLeafCollector(comparators, reverseMul) {
+        return new MultiComparatorLeafCollector(comparators, reverseMul, mayNeedScoresTwice) {
 
           @Override
           public void collect(int doc) throws IOException {
-            final float score = scorer.score();
-            if (score > maxScore) {
-              maxScore = score;
+            float score = Float.NaN;
+            if (trackMaxScore) {
+              score = scorer.score();
+              if (score > maxScore) {
+                maxScore = score;
+              }
             }
+
             ++totalHits;
             if (queueFull) {
               if (compareBottom(doc) <= 0) {
@@ -381,6 +234,10 @@ public abstract class TopFieldCollector
                 return;
               }
 
+              if (trackDocScores && !trackMaxScore) {
+                score = scorer.score();
+              }
+
               // This hit is competitive - replace bottom element in queue & adjustTop
               copy(bottom.slot, doc);
               updateBottom(doc, score);
@@ -388,6 +245,11 @@ public abstract class TopFieldCollector
             } else {
               // Startup transient: queue hasn't gathered numHits yet
               final int slot = totalHits - 1;
+
+              if (trackDocScores && !trackMaxScore) {
+                score = scorer.score();
+              }
+
               // Copy hit into queue
               copy(slot, doc);
               add(slot, doc, score);
@@ -413,6 +275,7 @@ public abstract class TopFieldCollector
     final boolean trackDocScores;
     final boolean trackMaxScore;
     final FieldDoc after;
+    final boolean mayNeedScoresTwice;
 
     public PagingFieldCollector(Sort sort, FieldValueHitQueue<Entry> queue, FieldDoc after, int numHits, boolean fillFields,
                                 boolean trackDocScores, boolean trackMaxScore) {
@@ -421,6 +284,7 @@ public abstract class TopFieldCollector
       this.trackDocScores = trackDocScores;
       this.trackMaxScore = trackMaxScore;
       this.after = after;
+      this.mayNeedScoresTwice = sort.needsScores() && (trackDocScores || trackMaxScore);
 
       // Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
       maxScore = Float.NEGATIVE_INFINITY;
@@ -438,7 +302,7 @@ public abstract class TopFieldCollector
     public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
       docBase = context.docBase;
       final int afterDoc = after.doc - docBase;
-      return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul()) {
+      return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul(), mayNeedScoresTwice) {
 
         @Override
         public void collect(int doc) throws IOException {
@@ -628,13 +492,7 @@ public abstract class TopFieldCollector
     FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
 
     if (after == null) {
-      if (trackMaxScore) {
-        return new ScoringMaxScoreCollector(sort, queue, numHits, fillFields);
-      } else if (trackDocScores) {
-        return new ScoringNoMaxScoreCollector(sort, queue, numHits, fillFields);
-      } else {
-        return new NonScoringCollector(sort, queue, numHits, fillFields);
-      }
+      return new SimpleFieldCollector(sort, queue, numHits, fillFields, trackDocScores, trackMaxScore);
     } else {
       if (after.fields == null) {
         throw new IllegalArgumentException("after.fields wasn't set; you must pass fillFields=true for the previous search");

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java?rev=1694435&r1=1694434&r2=1694435&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java Thu Aug  6 09:55:39 2015
@@ -17,13 +17,21 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
+import java.io.IOException;
+
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.FieldValueHitQueue.Entry;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
 
 public class TestTopFieldCollector extends LuceneTestCase {
   private IndexSearcher is;
@@ -166,5 +174,99 @@ public class TestTopFieldCollector exten
       assertEquals(0, td.totalHits);
       assertTrue(Float.isNaN(td.getMaxScore()));
     }
-  }  
+  }
+
+  public void testComputeScoresOnlyOnce() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    StringField text = new StringField("text", "foo", Store.NO);
+    doc.add(text);
+    NumericDocValuesField relevance = new NumericDocValuesField("relevance", 1);
+    doc.add(relevance);
+    w.addDocument(doc);
+    text.setStringValue("bar");
+    w.addDocument(doc);
+    text.setStringValue("baz");
+    w.addDocument(doc);
+    IndexReader reader = w.getReader();
+    TermQuery foo = new TermQuery(new Term("text", "foo"));
+    TermQuery bar = new TermQuery(new Term("text", "bar"));
+    bar.setBoost(2);
+    TermQuery baz = new TermQuery(new Term("text", "baz"));
+    baz.setBoost(3);
+    Query query = new BooleanQuery.Builder()
+        .add(foo, Occur.SHOULD)
+        .add(bar, Occur.SHOULD)
+        .add(baz, Occur.SHOULD)
+        .build();
+    final IndexSearcher searcher = new IndexSearcher(reader);
+    for (Sort sort : new Sort[] {new Sort(SortField.FIELD_SCORE), new Sort(new SortField("f", SortField.Type.SCORE))}) {
+      for (boolean doDocScores : new boolean[] {false, true}) {
+        for (boolean doMaxScore : new boolean[] {false, true}) {
+          final TopFieldCollector topCollector = TopFieldCollector.create(sort, TestUtil.nextInt(random(), 1, 2), true, doDocScores, doMaxScore);
+          final Collector assertingCollector = new Collector() {
+            @Override
+            public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
+              final LeafCollector in = topCollector.getLeafCollector(context);
+              return new FilterLeafCollector(in) {
+                @Override
+                public void setScorer(final Scorer scorer) throws IOException {
+                  Scorer s = new Scorer(null) {
+
+                    int lastComputedDoc = -1;
+                    
+                    @Override
+                    public float score() throws IOException {
+                      if (lastComputedDoc == docID()) {
+                        throw new AssertionError("Score computed twice on " + docID());
+                      }
+                      lastComputedDoc = docID();
+                      return scorer.score();
+                    }
+
+                    @Override
+                    public int freq() throws IOException {
+                      return scorer.freq();
+                    }
+
+                    @Override
+                    public int docID() {
+                      return scorer.docID();
+                    }
+
+                    @Override
+                    public int nextDoc() throws IOException {
+                      return scorer.nextDoc();
+                    }
+
+                    @Override
+                    public int advance(int target) throws IOException {
+                      return scorer.advance(target);
+                    }
+
+                    @Override
+                    public long cost() {
+                      return scorer.cost();
+                    }
+                    
+                  };
+                  super.setScorer(s);
+                }
+              };
+            }
+            @Override
+            public boolean needsScores() {
+              return topCollector.needsScores();
+            }
+          };
+          searcher.search(query, assertingCollector);
+        }
+      }
+    }
+    reader.close();
+    w.close();
+    dir.close();
+  }
+
 }