You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2018/08/30 20:02:01 UTC

[3/3] lucene-solr:branch_7x: LUCENE-8432: TopFieldComparator stops calling the comparator when only counting hits.

LUCENE-8432: TopFieldComparator stops calling the comparator when only counting hits.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/49979235
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/49979235
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/49979235

Branch: refs/heads/branch_7x
Commit: 4997923525270d4bd9b6f909a5b11d387b517822
Parents: b1d165c
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Aug 30 12:00:21 2018 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Aug 30 17:59:33 2018 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 ++
 .../apache/lucene/search/TopFieldCollector.java | 56 ++++++++++++++------
 2 files changed, 44 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/49979235/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index f8dfd55..ee4296a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -134,6 +134,10 @@ Improvements
 
 * LUCENE-8460: Better argument validation in StoredField. (Namgyu Kim)
 
+* LUCENE-8432: TopFieldComparator stops comparing documents if the index is
+  sorted, even if hits still need to be visited to compute the hit count.
+  (Nikolay Khitrin)
+
 Other:
 
 * LUCENE-8366: Upgrade to ICU 62.1. Emoji handling now uses Unicode 11's

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/49979235/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
index 066c2c3..ffcb691 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@@ -122,14 +122,17 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
       final LeafFieldComparator[] comparators = queue.getComparators(context);
       final int[] reverseMul = queue.getReverseMul();
       final Sort indexSort = context.reader().getMetaData().getSort();
+      final boolean canEarlyStopComparing = indexSort != null &&
+          canEarlyTerminate(sort, indexSort);
       final boolean canEarlyTerminate = trackTotalHits == false &&
           trackMaxScore == false &&
-          indexSort != null &&
-          canEarlyTerminate(sort, indexSort);
+          canEarlyStopComparing;
       final int initialTotalHits = totalHits;
 
       return new MultiComparatorLeafCollector(comparators, reverseMul, mayNeedScoresTwice) {
 
+        boolean collectedAllCompetitiveHits = false;
+
         @Override
         public void collect(int doc) throws IOException {
           float score = Float.NaN;
@@ -142,16 +145,25 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
 
           ++totalHits;
           if (queueFull) {
+            if (collectedAllCompetitiveHits) {
+              return;
+            }
+
             if (reverseMul * comparator.compareBottom(doc) <= 0) {
               // since docs are visited in doc Id order, if compare is 0, it means
               // this document is largest than anything else in the queue, and
               // therefore not competitive.
-              if (canEarlyTerminate) {
-                // scale totalHits linearly based on the number of docs
-                // and terminate collection
-                totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
-                earlyTerminated = true;
-                throw new CollectionTerminatedException();
+              if (canEarlyStopComparing) {
+                if (canEarlyTerminate) {
+                  // scale totalHits linearly based on the number of docs
+                  // and terminate collection
+                  totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
+                  earlyTerminated = true;
+                  throw new CollectionTerminatedException();
+                } else {
+                  collectedAllCompetitiveHits = true;
+                  return;
+                }
               } else {
                 // just move to the next doc
                 return;
@@ -232,13 +244,16 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
       docBase = context.docBase;
       final int afterDoc = after.doc - docBase;
       final Sort indexSort = context.reader().getMetaData().getSort();
+      final boolean canEarlyStopComparing = indexSort != null &&
+          canEarlyTerminate(sort, indexSort);
       final boolean canEarlyTerminate = trackTotalHits == false &&
           trackMaxScore == false &&
-          indexSort != null &&
-          canEarlyTerminate(sort, indexSort);
+          canEarlyStopComparing;
       final int initialTotalHits = totalHits;
       return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul(), mayNeedScoresTwice) {
 
+        boolean collectedAllCompetitiveHits = false;
+
         @Override
         public void collect(int doc) throws IOException {
           //System.out.println("  collect doc=" + doc);
@@ -254,17 +269,26 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
           }
 
           if (queueFull) {
+            if (collectedAllCompetitiveHits) {
+              return;
+            }
+
             // Fastmatch: return if this hit is no better than
             // the worst hit currently in the queue:
             final int cmp = reverseMul * comparator.compareBottom(doc);
             if (cmp <= 0) {
               // not competitive since documents are visited in doc id order
-              if (canEarlyTerminate) {
-                // scale totalHits linearly based on the number of docs
-                // and terminate collection
-                totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
-                earlyTerminated = true;
-                throw new CollectionTerminatedException();
+              if (canEarlyStopComparing) {
+                if (canEarlyTerminate) {
+                  // scale totalHits linearly based on the number of docs
+                  // and terminate collection
+                  totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
+                  earlyTerminated = true;
+                  throw new CollectionTerminatedException();
+                } else {
+                  collectedAllCompetitiveHits = true;
+                  return;
+                }
               } else {
                 // just move to the next doc
                 return;