You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by so...@apache.org on 2020/02/27 13:15:59 UTC
[lucene-solr] branch master updated: LUCENE-9202: refactor leaf
collectors in TopFieldCollector
This is an automated email from the ASF dual-hosted git repository.
sokolov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 294b8d4 LUCENE-9202: refactor leaf collectors in TopFieldCollector
294b8d4 is described below
commit 294b8d4ee1586ba6e20480aa5072386963056347
Author: Michael Sokolov <so...@amazon.com>
AuthorDate: Sun Jan 26 10:31:20 2020 -0500
LUCENE-9202: refactor leaf collectors in TopFieldCollector
---
.../apache/lucene/search/TopFieldCollector.java | 209 +++++++++------------
1 file changed, 89 insertions(+), 120 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
index c638bf9..0def795 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@@ -69,6 +69,79 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
}
}
+ private abstract class TopFieldLeafCollector extends MultiComparatorLeafCollector {
+
+ final boolean canEarlyTerminate;
+ boolean collectedAllCompetitiveHits = false;
+
+ TopFieldLeafCollector(FieldValueHitQueue<Entry> queue, Sort sort, LeafReaderContext context) throws IOException {
+ super(queue.getComparators(context), queue.getReverseMul());
+ final Sort indexSort = context.reader().getMetaData().getSort();
+ canEarlyTerminate = canEarlyTerminate(sort, indexSort);
+ }
+
+ void countHit(int doc) throws IOException {
+ ++totalHits;
+ hitsThresholdChecker.incrementHitCount();
+
+ if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
+ updateGlobalMinCompetitiveScore(scorer);
+ }
+ }
+
+ boolean thresholdCheck(int doc) throws IOException {
+ if (collectedAllCompetitiveHits || reverseMul * comparator.compareBottom(doc) <= 0) {
+ // since docs are visited in doc Id order, if compare is 0, it means
+ // this document is largest than anything else in the queue, and
+ // therefore not competitive.
+ if (canEarlyTerminate) {
+ if (hitsThresholdChecker.isThresholdReached()) {
+ totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
+ throw new CollectionTerminatedException();
+ } else {
+ collectedAllCompetitiveHits = true;
+ }
+ } else if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
+ // we can start setting the min competitive score if the
+ // threshold is reached for the first time here.
+ updateMinCompetitiveScore(scorer);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ void collectCompetitiveHit(int doc) throws IOException {
+ // This hit is competitive - replace bottom element in queue & adjustTop
+ comparator.copy(bottom.slot, doc);
+ updateBottom(doc);
+ comparator.setBottom(bottom.slot);
+ updateMinCompetitiveScore(scorer);
+ }
+
+ void collectAnyHit(int doc, int hitsCollected) throws IOException {
+ // Startup transient: queue hasn't gathered numHits yet
+ int slot = hitsCollected - 1;
+ // Copy hit into queue
+ comparator.copy(slot, doc);
+ add(slot, doc);
+ if (queueFull) {
+ comparator.setBottom(bottom.slot);
+ updateMinCompetitiveScore(scorer);
+ }
+ }
+
+ @Override
+ public void setScorer(Scorable scorer) throws IOException {
+ super.setScorer(scorer);
+ minCompetitiveScore = 0f;
+ updateMinCompetitiveScore(scorer);
+ if (minScoreAcc != null) {
+ updateGlobalMinCompetitiveScore(scorer);
+ }
+ }
+ }
+
static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) {
return canEarlyTerminateOnDocId(searchSort) ||
canEarlyTerminateOnPrefix(searchSort, indexSort);
@@ -113,73 +186,20 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
docBase = context.docBase;
- final LeafFieldComparator[] comparators = queue.getComparators(context);
- final int[] reverseMul = queue.getReverseMul();
- final Sort indexSort = context.reader().getMetaData().getSort();
- final boolean canEarlyTerminate = canEarlyTerminate(sort, indexSort);
-
- return new MultiComparatorLeafCollector(comparators, reverseMul) {
-
- boolean collectedAllCompetitiveHits = false;
-
- @Override
- public void setScorer(Scorable scorer) throws IOException {
- super.setScorer(scorer);
- minCompetitiveScore = 0f;
- updateMinCompetitiveScore(scorer);
- if (minScoreAcc != null) {
- updateGlobalMinCompetitiveScore(scorer);
- }
- }
+ return new TopFieldLeafCollector(queue, sort, context) {
@Override
public void collect(int doc) throws IOException {
- ++totalHits;
- hitsThresholdChecker.incrementHitCount();
-
- if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
- updateGlobalMinCompetitiveScore(scorer);
- }
-
+ countHit(doc);
if (queueFull) {
- if (collectedAllCompetitiveHits || reverseMul * comparator.compareBottom(doc) <= 0) {
- // since docs are visited in doc Id order, if compare is 0, it means
- // this document is largest than anything else in the queue, and
- // therefore not competitive.
- if (canEarlyTerminate) {
- if (hitsThresholdChecker.isThresholdReached()) {
- totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
- throw new CollectionTerminatedException();
- } else {
- collectedAllCompetitiveHits = true;
- }
- } else if (totalHitsRelation == Relation.EQUAL_TO) {
- // we can start setting the min competitive score if the
- // threshold is reached for the first time here.
- updateMinCompetitiveScore(scorer);
- }
+ if (thresholdCheck(doc)) {
return;
}
-
- // This hit is competitive - replace bottom element in queue & adjustTop
- comparator.copy(bottom.slot, doc);
- updateBottom(doc);
- comparator.setBottom(bottom.slot);
- updateMinCompetitiveScore(scorer);
+ collectCompetitiveHit(doc);
} else {
- // Startup transient: queue hasn't gathered numHits yet
- final int slot = totalHits - 1;
-
- // Copy hit into queue
- comparator.copy(slot, doc);
- add(slot, doc);
- if (queueFull) {
- comparator.setBottom(bottom.slot);
- updateMinCompetitiveScore(scorer);
- }
+ collectAnyHit(doc, totalHits);
}
}
-
};
}
@@ -215,52 +235,14 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
docBase = context.docBase;
final int afterDoc = after.doc - docBase;
- final Sort indexSort = context.reader().getMetaData().getSort();
- final boolean canEarlyTerminate = canEarlyTerminate(sort, indexSort);
- return new MultiComparatorLeafCollector(queue.getComparators(context), queue.getReverseMul()) {
-
- boolean collectedAllCompetitiveHits = false;
- @Override
- public void setScorer(Scorable scorer) throws IOException {
- super.setScorer(scorer);
- minCompetitiveScore = 0f;
- updateMinCompetitiveScore(scorer);
- if (minScoreAcc != null) {
- updateGlobalMinCompetitiveScore(scorer);
- }
- }
+ return new TopFieldLeafCollector(queue, sort, context) {
@Override
public void collect(int doc) throws IOException {
- //System.out.println(" collect doc=" + doc);
-
- totalHits++;
- hitsThresholdChecker.incrementHitCount();
-
- if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
- updateGlobalMinCompetitiveScore(scorer);
- }
-
+ countHit(doc);
if (queueFull) {
- // Fastmatch: return if this hit is no better than
- // the worst hit currently in the queue:
- if (collectedAllCompetitiveHits || reverseMul * comparator.compareBottom(doc) <= 0) {
- // since docs are visited in doc Id order, if compare is 0, it means
- // this document is largest than anything else in the queue, and
- // therefore not competitive.
- if (canEarlyTerminate) {
- if (hitsThresholdChecker.isThresholdReached()) {
- totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
- throw new CollectionTerminatedException();
- } else {
- collectedAllCompetitiveHits = true;
- }
- } else if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
- // we can start setting the min competitive score if the
- // threshold is reached for the first time here.
- updateMinCompetitiveScore(scorer);
- }
+ if (thresholdCheck(doc)) {
return;
}
}
@@ -277,28 +259,10 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
}
if (queueFull) {
- // This hit is competitive - replace bottom element in queue & adjustTop
- comparator.copy(bottom.slot, doc);
-
- updateBottom(doc);
-
- comparator.setBottom(bottom.slot);
- updateMinCompetitiveScore(scorer);
+ collectCompetitiveHit(doc);
} else {
collectedHits++;
-
- // Startup transient: queue hasn't gathered numHits yet
- final int slot = collectedHits - 1;
- //System.out.println(" slot=" + slot);
- // Copy hit into queue
- comparator.copy(slot, doc);
-
- bottom = pq.add(new Entry(slot, docBase + doc));
- queueFull = collectedHits == numHits;
- if (queueFull) {
- comparator.setBottom(bottom.slot);
- updateMinCompetitiveScore(scorer);
- }
+ collectAnyHit(doc, collectedHits);
}
}
};
@@ -555,7 +519,11 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
final void add(int slot, int doc) {
bottom = pq.add(new Entry(slot, docBase + doc));
- queueFull = totalHits == numHits;
+ // The queue is full either when totalHits == numHits (in SimpleFieldCollector), in which case
+ // slot = totalHits - 1, or when hitsCollected == numHits (in PagingFieldCollector this is hits
+ // on the current page) and slot = hitsCollected - 1.
+ assert slot < numHits;
+ queueFull = slot == numHits - 1;
}
final void updateBottom(int doc) {
@@ -597,4 +565,5 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
public boolean isEarlyTerminated() {
return totalHitsRelation == Relation.GREATER_THAN_OR_EQUAL_TO;
}
+
}