You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by tf...@apache.org on 2020/06/18 17:46:39 UTC
[lucene-solr] branch branch_8x updated: LUCENE-9402: Let
MultiCollector handle minCompetitiveScore (#1567)
This is an automated email from the ASF dual-hosted git repository.
tflobbe pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8x by this push:
new 8d20e31 LUCENE-9402: Let MultiCollector handle minCompetitiveScore (#1567)
8d20e31 is described below
commit 8d20e31b22244f4fa68d5d8d82da5d07c4b6a351
Author: Tomas Fernandez Lobbe <tf...@apache.org>
AuthorDate: Thu Jun 18 10:19:49 2020 -0700
LUCENE-9402: Let MultiCollector handle minCompetitiveScore (#1567)
---
lucene/CHANGES.txt | 2 +
.../org/apache/lucene/search/MultiCollector.java | 111 +++++++++----
.../apache/lucene/search/MultiCollectorTest.java | 172 +++++++++++++++++++++
3 files changed, 251 insertions(+), 34 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index aa1ae36..6dfe6a2 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -79,6 +79,8 @@ Improvements
* LUCENE-9396: Improved truncation detection for points. (Adrien Grand, Robert Muir)
+* LUCENE-9402: Let MultiCollector handle minCompetitiveScore (Tomás Fernández Löbbe, Adrien Grand)
+
Optimizations
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
index 82251e4..5cb6db8 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
@@ -117,7 +117,7 @@ public class MultiCollector implements Collector {
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
- final List<LeafCollector> leafCollectors = new ArrayList<>();
+ final List<LeafCollector> leafCollectors = new ArrayList<>(collectors.length);
for (Collector collector : collectors) {
final LeafCollector leafCollector;
try {
@@ -134,7 +134,7 @@ public class MultiCollector implements Collector {
case 1:
return leafCollectors.get(0);
default:
- return new MultiLeafCollector(leafCollectors, cacheScores);
+ return new MultiLeafCollector(leafCollectors, cacheScores, scoreMode() == ScoreMode.TOP_SCORES);
}
}
@@ -142,12 +142,14 @@ public class MultiCollector implements Collector {
private final boolean cacheScores;
private final LeafCollector[] collectors;
- private int numCollectors;
+ private final float[] minScores;
+ private final boolean skipNonCompetitiveScores;
- private MultiLeafCollector(List<LeafCollector> collectors, boolean cacheScores) {
+ private MultiLeafCollector(List<LeafCollector> collectors, boolean cacheScores, boolean skipNonCompetitive) {
this.collectors = collectors.toArray(new LeafCollector[collectors.size()]);
this.cacheScores = cacheScores;
- this.numCollectors = this.collectors.length;
+ this.skipNonCompetitiveScores = skipNonCompetitive;
+ this.minScores = this.skipNonCompetitiveScores ? new float[this.collectors.length] : null;
}
@Override
@@ -155,48 +157,89 @@ public class MultiCollector implements Collector {
if (cacheScores) {
scorer = new ScoreCachingWrappingScorer(scorer);
}
- scorer = new FilterScorable(scorer) {
- @Override
- public void setMinCompetitiveScore(float minScore) {
- // Ignore calls to setMinCompetitiveScore so that if we wrap two
- // collectors and one of them wants to skip low-scoring hits, then
- // the other collector still sees all hits. We could try to reconcile
- // min scores and take the maximum min score across collectors, but
- // this is very unlikely to be helpful in practice.
+ if (skipNonCompetitiveScores) {
+ for (int i = 0; i < collectors.length; ++i) {
+ final LeafCollector c = collectors[i];
+ if (c != null) {
+ c.setScorer(new MinCompetitiveScoreAwareScorable(scorer, i, minScores));
+ }
}
+ } else {
+ scorer = new FilterScorable(scorer) {
+ @Override
+ public void setMinCompetitiveScore(float minScore) throws IOException {
+ // Ignore calls to setMinCompetitiveScore so that if we wrap two
+ // collectors and one of them wants to skip low-scoring hits, then
+ // the other collector still sees all hits.
+ }
- };
- for (int i = 0; i < numCollectors; ++i) {
- final LeafCollector c = collectors[i];
- c.setScorer(scorer);
+ };
+ for (int i = 0; i < collectors.length; ++i) {
+ final LeafCollector c = collectors[i];
+ if (c != null) {
+ c.setScorer(scorer);
+ }
+ }
}
}
- private void removeCollector(int i) {
- System.arraycopy(collectors, i + 1, collectors, i, numCollectors - i - 1);
- --numCollectors;
- collectors[numCollectors] = null;
- }
-
@Override
public void collect(int doc) throws IOException {
- final LeafCollector[] collectors = this.collectors;
- int numCollectors = this.numCollectors;
- for (int i = 0; i < numCollectors; ) {
+ for (int i = 0; i < collectors.length; i++) {
final LeafCollector collector = collectors[i];
- try {
- collector.collect(doc);
- ++i;
- } catch (CollectionTerminatedException e) {
- removeCollector(i);
- numCollectors = this.numCollectors;
- if (numCollectors == 0) {
- throw new CollectionTerminatedException();
+ if (collector != null) {
+ try {
+ collector.collect(doc);
+ } catch (CollectionTerminatedException e) {
+ collectors[i] = null;
+ if (allCollectorsTerminated()) {
+ throw new CollectionTerminatedException();
+ }
}
}
}
}
+ private boolean allCollectorsTerminated() {
+ for (int i = 0; i < collectors.length; i++) {
+ if (collectors[i] != null) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ }
+
+ final static class MinCompetitiveScoreAwareScorable extends FilterScorable {
+
+ private final int idx;
+ private final float[] minScores;
+
+ MinCompetitiveScoreAwareScorable(Scorable in, int idx, float[] minScores) {
+ super(in);
+ this.idx = idx;
+ this.minScores = minScores;
+ }
+
+ @Override
+ public void setMinCompetitiveScore(float minScore) throws IOException {
+ if (minScore > minScores[idx]) {
+ minScores[idx] = minScore;
+ in.setMinCompetitiveScore(minScore());
+ }
+ }
+
+ private float minScore() {
+ float min = Float.MAX_VALUE;
+ for (int i = 0; i < minScores.length; i++) {
+ if (minScores[i] < min) {
+ min = minScores[i];
+ }
+ }
+ return min;
+ }
+
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/MultiCollectorTest.java b/lucene/core/src/test/org/apache/lucene/search/MultiCollectorTest.java
index c3b4f42..80a5a9a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/MultiCollectorTest.java
+++ b/lucene/core/src/test/org/apache/lucene/search/MultiCollectorTest.java
@@ -163,4 +163,176 @@ public class MultiCollectorTest extends LuceneTestCase {
reader.close();
dir.close();
}
+
+ public void testScorerWrappingForTopScores() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+ iw.addDocument(new Document());
+ DirectoryReader reader = iw.getReader();
+ iw.close();
+ final LeafReaderContext ctx = reader.leaves().get(0);
+ Collector c1 = collector(ScoreMode.TOP_SCORES, MultiCollector.MinCompetitiveScoreAwareScorable.class);
+ Collector c2 = collector(ScoreMode.TOP_SCORES, MultiCollector.MinCompetitiveScoreAwareScorable.class);
+ MultiCollector.wrap(c1, c2).getLeafCollector(ctx).setScorer(new ScoreAndDoc());
+
+ c1 = collector(ScoreMode.TOP_SCORES, ScoreCachingWrappingScorer.class);
+ c2 = collector(ScoreMode.COMPLETE, ScoreCachingWrappingScorer.class);
+ MultiCollector.wrap(c1, c2).getLeafCollector(ctx).setScorer(new ScoreAndDoc());
+
+ reader.close();
+ dir.close();
+ }
+
+ public void testMinCompetitiveScore() throws IOException {
+ float[] currentMinScores = new float[3];
+ float[] minCompetitiveScore = new float[1];
+ Scorable scorer = new Scorable() {
+
+ @Override
+ public float score() throws IOException {
+ return 0;
+ }
+
+ @Override
+ public int docID() {
+ return 0;
+ }
+
+ @Override
+ public void setMinCompetitiveScore(float minScore) throws IOException {
+ minCompetitiveScore[0] = minScore;
+ }
+ };
+ Scorable s0 = new MultiCollector.MinCompetitiveScoreAwareScorable(scorer, 0, currentMinScores);
+ Scorable s1 = new MultiCollector.MinCompetitiveScoreAwareScorable(scorer, 1, currentMinScores);
+ Scorable s2 = new MultiCollector.MinCompetitiveScoreAwareScorable(scorer, 2, currentMinScores);
+ assertEquals(0f, minCompetitiveScore[0], 0);
+ s0.setMinCompetitiveScore(0.5f);
+ assertEquals(0f, minCompetitiveScore[0], 0);
+ s1.setMinCompetitiveScore(0.8f);
+ assertEquals(0f, minCompetitiveScore[0], 0);
+ s2.setMinCompetitiveScore(0.3f);
+ assertEquals(0.3f, minCompetitiveScore[0], 0);
+ s2.setMinCompetitiveScore(0.1f);
+ assertEquals(0.3f, minCompetitiveScore[0], 0);
+ s1.setMinCompetitiveScore(Float.MAX_VALUE);
+ assertEquals(0.3f, minCompetitiveScore[0], 0);
+ s2.setMinCompetitiveScore(Float.MAX_VALUE);
+ assertEquals(0.5f, minCompetitiveScore[0], 0);
+ s0.setMinCompetitiveScore(Float.MAX_VALUE);
+ assertEquals(Float.MAX_VALUE, minCompetitiveScore[0], 0);
+ }
+
+ public void testCollectionTermination() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+ iw.addDocument(new Document());
+ DirectoryReader reader = iw.getReader();
+ iw.close();
+ final LeafReaderContext ctx = reader.leaves().get(0);
+ DummyCollector c1 = new TerminatingDummyCollector(1, ScoreMode.COMPLETE);
+ DummyCollector c2 = new TerminatingDummyCollector(2, ScoreMode.COMPLETE);
+
+ Collector mc = MultiCollector.wrap(c1, c2);
+ LeafCollector lc = mc.getLeafCollector(ctx);
+ lc.setScorer(new ScoreAndDoc());
+ lc.collect(0); // OK
+ assertTrue("c1's collect should be called", c1.collectCalled);
+ assertTrue("c2's collect should be called", c2.collectCalled);
+ c1.collectCalled = false;
+ c2.collectCalled = false;
+ lc.collect(1); // OK, but c1 should terminate
+ assertFalse("c1 should be removed already", c1.collectCalled);
+ assertTrue("c2's collect should be called", c2.collectCalled);
+ c2.collectCalled = false;
+
+ expectThrows(CollectionTerminatedException.class, () -> {
+ lc.collect(2);
+ });
+ assertFalse("c1 should be removed already", c1.collectCalled);
+ assertFalse("c2 should be removed already", c2.collectCalled);
+
+ reader.close();
+ dir.close();
+ }
+
+ public void testSetScorerOnCollectionTerminationSkipNonCompetitive() throws IOException {
+ doTestSetScorerOnCollectionTermination(true);
+ }
+
+ public void testSetScorerOnCollectionTerminationSkipNoSkips() throws IOException {
+ doTestSetScorerOnCollectionTermination(false);
+ }
+
+ private void doTestSetScorerOnCollectionTermination(boolean allowSkipNonCompetitive) throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+ iw.addDocument(new Document());
+ DirectoryReader reader = iw.getReader();
+ iw.close();
+ final LeafReaderContext ctx = reader.leaves().get(0);
+
+ DummyCollector c1 = new TerminatingDummyCollector(1, allowSkipNonCompetitive? ScoreMode.TOP_SCORES : ScoreMode.COMPLETE);
+ DummyCollector c2 = new TerminatingDummyCollector(2, allowSkipNonCompetitive? ScoreMode.TOP_SCORES : ScoreMode.COMPLETE);
+
+ Collector mc = MultiCollector.wrap(c1, c2);
+ LeafCollector lc = mc.getLeafCollector(ctx);
+ assertFalse(c1.setScorerCalled);
+ assertFalse(c2.setScorerCalled);
+ lc.setScorer(new ScoreAndDoc());
+ assertTrue(c1.setScorerCalled);
+ assertTrue(c2.setScorerCalled);
+ c1.setScorerCalled = false;
+ c2.setScorerCalled = false;
+ lc.collect(0); // OK
+
+ lc.setScorer(new ScoreAndDoc());
+ assertTrue(c1.setScorerCalled);
+ assertTrue(c2.setScorerCalled);
+ c1.setScorerCalled = false;
+ c2.setScorerCalled = false;
+
+ lc.collect(1); // OK, but c1 should terminate
+ lc.setScorer(new ScoreAndDoc());
+ assertFalse(c1.setScorerCalled);
+ assertTrue(c2.setScorerCalled);
+ c2.setScorerCalled = false;
+
+ expectThrows(CollectionTerminatedException.class, () -> {
+ lc.collect(2);
+ });
+ lc.setScorer(new ScoreAndDoc());
+ assertFalse(c1.setScorerCalled);
+ assertFalse(c2.setScorerCalled);
+
+ reader.close();
+ dir.close();
+ }
+
+ private static class TerminatingDummyCollector extends DummyCollector {
+
+ private final int terminateOnDoc;
+ private final ScoreMode scoreMode;
+
+ public TerminatingDummyCollector(int terminateOnDoc, ScoreMode scoreMode) {
+ super();
+ this.terminateOnDoc = terminateOnDoc;
+ this.scoreMode = scoreMode;
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ if (doc == terminateOnDoc) {
+ throw new CollectionTerminatedException();
+ }
+ super.collect(doc);
+ }
+
+ @Override
+ public ScoreMode scoreMode() {
+ return scoreMode;
+ }
+
+ }
+
}