You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2018/08/30 20:02:01 UTC
[3/3] lucene-solr:branch_7x: LUCENE-8432: TopFieldComparator stops
calling the comparator when only counting hits.
LUCENE-8432: TopFieldComparator stops calling the comparator when only counting hits.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/49979235
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/49979235
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/49979235
Branch: refs/heads/branch_7x
Commit: 4997923525270d4bd9b6f909a5b11d387b517822
Parents: b1d165c
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Aug 30 12:00:21 2018 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Aug 30 17:59:33 2018 +0200
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 ++
.../apache/lucene/search/TopFieldCollector.java | 56 ++++++++++++++------
2 files changed, 44 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/49979235/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index f8dfd55..ee4296a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -134,6 +134,10 @@ Improvements
* LUCENE-8460: Better argument validation in StoredField. (Namgyu Kim)
+* LUCENE-8432: TopFieldComparator stops comparing documents if the index is
+ sorted, even if hits still need to be visited to compute the hit count.
+ (Nikolay Khitrin)
+
Other:
* LUCENE-8366: Upgrade to ICU 62.1. Emoji handling now uses Unicode 11's
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/49979235/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
index 066c2c3..ffcb691 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@@ -122,14 +122,17 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
final LeafFieldComparator[] comparators = queue.getComparators(context);
final int[] reverseMul = queue.getReverseMul();
final Sort indexSort = context.reader().getMetaData().getSort();
+ final boolean canEarlyStopComparing = indexSort != null &&
+ canEarlyTerminate(sort, indexSort);
final boolean canEarlyTerminate = trackTotalHits == false &&
trackMaxScore == false &&
- indexSort != null &&
- canEarlyTerminate(sort, indexSort);
+ canEarlyStopComparing;
final int initialTotalHits = totalHits;
return new MultiComparatorLeafCollector(comparators, reverseMul, mayNeedScoresTwice) {
+ boolean collectedAllCompetitiveHits = false;
+
@Override
public void collect(int doc) throws IOException {
float score = Float.NaN;
@@ -142,16 +145,25 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
++totalHits;
if (queueFull) {
+ if (collectedAllCompetitiveHits) {
+ return;
+ }
+
if (reverseMul * comparator.compareBottom(doc) <= 0) {
// since docs are visited in doc Id order, if compare is 0, it means
// this document is largest than anything else in the queue, and
// therefore not competitive.
- if (canEarlyTerminate) {
- // scale totalHits linearly based on the number of docs
- // and terminate collection
- totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
- earlyTerminated = true;
- throw new CollectionTerminatedException();
+ if (canEarlyStopComparing) {
+ if (canEarlyTerminate) {
+ // scale totalHits linearly based on the number of docs
+ // and terminate collection
+ totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
+ earlyTerminated = true;
+ throw new CollectionTerminatedException();
+ } else {
+ collectedAllCompetitiveHits = true;
+ return;
+ }
} else {
// just move to the next doc
return;
@@ -232,13 +244,16 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
docBase = context.docBase;
final int afterDoc = after.doc - docBase;
final Sort indexSort = context.reader().getMetaData().getSort();
+ final boolean canEarlyStopComparing = indexSort != null &&
+ canEarlyTerminate(sort, indexSort);
final boolean canEarlyTerminate = trackTotalHits == false &&
trackMaxScore == false &&
- indexSort != null &&
- canEarlyTerminate(sort, indexSort);
+ canEarlyStopComparing;
final int initialTotalHits = totalHits;
return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul(), mayNeedScoresTwice) {
+ boolean collectedAllCompetitiveHits = false;
+
@Override
public void collect(int doc) throws IOException {
//System.out.println(" collect doc=" + doc);
@@ -254,17 +269,26 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
}
if (queueFull) {
+ if (collectedAllCompetitiveHits) {
+ return;
+ }
+
// Fastmatch: return if this hit is no better than
// the worst hit currently in the queue:
final int cmp = reverseMul * comparator.compareBottom(doc);
if (cmp <= 0) {
// not competitive since documents are visited in doc id order
- if (canEarlyTerminate) {
- // scale totalHits linearly based on the number of docs
- // and terminate collection
- totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
- earlyTerminated = true;
- throw new CollectionTerminatedException();
+ if (canEarlyStopComparing) {
+ if (canEarlyTerminate) {
+ // scale totalHits linearly based on the number of docs
+ // and terminate collection
+ totalHits += estimateRemainingHits(totalHits - initialTotalHits, doc, context.reader().maxDoc());
+ earlyTerminated = true;
+ throw new CollectionTerminatedException();
+ } else {
+ collectedAllCompetitiveHits = true;
+ return;
+ }
} else {
// just move to the next doc
return;