You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ji...@apache.org on 2021/10/11 09:10:12 UTC
[lucene-solr] branch branch_8_10 updated: LUCENE-10110:
MultiCollector should wrap single leaf collector that wants to skip
low-scoring hits but the combined score mode doesn't allow it.
This is an automated email from the ASF dual-hosted git repository.
jimczi pushed a commit to branch branch_8_10
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/branch_8_10 by this push:
new cd420d9 LUCENE-10110: MultiCollector should wrap single leaf collector that wants to skip low-scoring hits but the combined score mode doesn't allow it.
cd420d9 is described below
commit cd420d949eece3b5fe15094ebb5c62c397bdb5ee
Author: jimczi <ji...@apache.org>
AuthorDate: Mon Sep 20 09:12:08 2021 +0200
LUCENE-10110: MultiCollector should wrap single leaf collector that wants to skip low-scoring hits
but the combined score mode doesn't allow it.
---
lucene/CHANGES.txt | 3 +
.../org/apache/lucene/search/MultiCollector.java | 46 +++++---
.../apache/lucene/search/TestMultiCollector.java | 121 ++++++++++++++++-----
3 files changed, 125 insertions(+), 45 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1711a3a..82ebe5e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -8,6 +8,9 @@ http://s.apache.org/luceneversions
Bug Fixes
---------------------
+* LUCENE-10110: MultiCollector now handles single leaf collector that wants to skip low-scoring hits
+ but the combined score mode doesn't allow it. (Jim Ferenczi)
+
* LUCENE-10119: Sort optimization with search_after can wrongly skip documents
whose values are equal to the last value of the previous page (Nhat Nguyen)
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
index c7178fa..8fc162c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
@@ -25,10 +25,14 @@ import java.util.List;
import org.apache.lucene.index.LeafReaderContext;
/**
- * A {@link Collector} which allows running a search with several
- * {@link Collector}s. It offers a static {@link #wrap} method which accepts a
- * list of collectors and wraps them with {@link MultiCollector}, while
- * filtering out the <code>null</code> null ones.
+ * A {@link Collector} which allows running a search with several {@link Collector}s. It offers a
+ * static {@link #wrap} method which accepts a list of collectors and wraps them with {@link
+ * MultiCollector}, while filtering out the <code>null</code> null ones.
+ *
+ * <p><b>NOTE:</b>When mixing collectors that want to skip low-scoring hits ({@link
+ * ScoreMode#TOP_SCORES}) with ones that require to see all hits, such as mixing {@link
+ * TopScoreDocCollector} and {@link TotalHitCountCollector}, it should be faster to run the query
+ * twice, once for each collector, rather than using this wrapper on a single search.
*/
public class MultiCollector implements Collector {
@@ -48,7 +52,7 @@ public class MultiCollector implements Collector {
* <li>Otherwise the method returns a {@link MultiCollector} which wraps the
* non-<code>null</code> ones.
* </ul>
- *
+ *
* @throws IllegalArgumentException
* if either 0 collectors were input, or all collectors are
* <code>null</code>.
@@ -87,7 +91,7 @@ public class MultiCollector implements Collector {
return new MultiCollector(colls);
}
}
-
+
private final boolean cacheScores;
private final Collector[] collectors;
@@ -118,6 +122,7 @@ public class MultiCollector implements Collector {
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final List<LeafCollector> leafCollectors = new ArrayList<>(collectors.length);
+ ScoreMode leafScoreMode = null;
for (Collector collector : collectors) {
final LeafCollector leafCollector;
try {
@@ -126,15 +131,24 @@ public class MultiCollector implements Collector {
// this leaf collector does not need this segment
continue;
}
+ if (leafScoreMode == null) {
+ leafScoreMode = collector.scoreMode();
+ } else if (leafScoreMode != collector.scoreMode()) {
+ leafScoreMode = ScoreMode.COMPLETE;
+ }
leafCollectors.add(leafCollector);
}
- switch (leafCollectors.size()) {
- case 0:
- throw new CollectionTerminatedException();
- case 1:
+ if (leafCollectors.isEmpty()) {
+ throw new CollectionTerminatedException();
+ } else {
+ // Wraps single leaf collector that wants to skip low-scoring hits (ScoreMode.TOP_SCORES)
+ // but the global score mode doesn't allow it.
+ if (leafCollectors.size() == 1
+ && (scoreMode() == ScoreMode.TOP_SCORES || leafScoreMode != ScoreMode.TOP_SCORES)) {
return leafCollectors.get(0);
- default:
- return new MultiLeafCollector(leafCollectors, cacheScores, scoreMode() == ScoreMode.TOP_SCORES);
+ }
+ return new MultiLeafCollector(
+ leafCollectors, cacheScores, scoreMode() == ScoreMode.TOP_SCORES);
}
}
@@ -210,9 +224,9 @@ public class MultiCollector implements Collector {
}
}
-
+
final static class MinCompetitiveScoreAwareScorable extends FilterScorable {
-
+
private final int idx;
private final float[] minScores;
@@ -221,7 +235,7 @@ public class MultiCollector implements Collector {
this.idx = idx;
this.minScores = minScores;
}
-
+
@Override
public void setMinCompetitiveScore(float minScore) throws IOException {
if (minScore > minScores[idx]) {
@@ -239,7 +253,7 @@ public class MultiCollector implements Collector {
}
return min;
}
-
+
}
}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMultiCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestMultiCollector.java
index f1adc1b..f7f6ee3 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestMultiCollector.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMultiCollector.java
@@ -220,6 +220,69 @@ public class TestMultiCollector extends LuceneTestCase {
dir.close();
}
+ public void testDisablesSetMinScoreWithEarlyTermination() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+ w.addDocument(new Document());
+ IndexReader reader = DirectoryReader.open(w);
+ w.close();
+
+ Scorable scorer =
+ new Scorable() {
+ @Override
+ public int docID() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public float score() {
+ return 0;
+ }
+
+ @Override
+ public void setMinCompetitiveScore(float minScore) {
+ throw new AssertionError();
+ }
+ };
+
+ Collector collector =
+ new SimpleCollector() {
+ private Scorable scorer;
+ float minScore = 0;
+
+ @Override
+ public ScoreMode scoreMode() {
+ return ScoreMode.TOP_SCORES;
+ }
+
+ @Override
+ public void setScorer(Scorable scorer) throws IOException {
+ this.scorer = scorer;
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ minScore = Math.nextUp(minScore);
+ scorer.setMinCompetitiveScore(minScore);
+ }
+ };
+ for (int numCol = 1; numCol < 4; numCol++) {
+ List<Collector> cols = new ArrayList<>();
+ cols.add(collector);
+ for (int col = 0; col < numCol; col++) {
+ cols.add(new TerminateAfterCollector(new TotalHitCountCollector(), 0));
+ }
+ Collections.shuffle(cols, random());
+ Collector multiCollector = MultiCollector.wrap(cols);
+ LeafCollector leafCollector = multiCollector.getLeafCollector(reader.leaves().get(0));
+ leafCollector.setScorer(scorer);
+ leafCollector.collect(0); // no exception
+ }
+
+ reader.close();
+ dir.close();
+ }
+
private static class DummyCollector extends SimpleCollector {
boolean collectCalled = false;
@@ -271,7 +334,7 @@ public class TestMultiCollector extends LuceneTestCase {
assertSame(dc, MultiCollector.wrap(dc));
assertSame(dc, MultiCollector.wrap(dc, null));
}
-
+
@Test
public void testCollector() throws Exception {
// Tests that the collector delegates calls to input collectors properly.
@@ -310,7 +373,7 @@ public class TestMultiCollector extends LuceneTestCase {
@Override
public void collect(int doc) throws IOException {}
-
+
};
}
@@ -318,7 +381,7 @@ public class TestMultiCollector extends LuceneTestCase {
public ScoreMode scoreMode() {
return scoreMode;
}
-
+
};
}
@@ -329,7 +392,7 @@ public class TestMultiCollector extends LuceneTestCase {
iw.commit();
DirectoryReader reader = iw.getReader();
iw.close();
-
+
final LeafReaderContext ctx = reader.leaves().get(0);
expectThrows(AssertionError.class, () -> {
@@ -354,7 +417,7 @@ public class TestMultiCollector extends LuceneTestCase {
reader.close();
dir.close();
}
-
+
public void testScorerWrappingForTopScores() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
@@ -365,30 +428,30 @@ public class TestMultiCollector extends LuceneTestCase {
Collector c1 = collector(ScoreMode.TOP_SCORES, MultiCollector.MinCompetitiveScoreAwareScorable.class);
Collector c2 = collector(ScoreMode.TOP_SCORES, MultiCollector.MinCompetitiveScoreAwareScorable.class);
MultiCollector.wrap(c1, c2).getLeafCollector(ctx).setScorer(new ScoreAndDoc());
-
+
c1 = collector(ScoreMode.TOP_SCORES, ScoreCachingWrappingScorer.class);
c2 = collector(ScoreMode.COMPLETE, ScoreCachingWrappingScorer.class);
MultiCollector.wrap(c1, c2).getLeafCollector(ctx).setScorer(new ScoreAndDoc());
-
+
reader.close();
dir.close();
}
-
+
public void testMinCompetitiveScore() throws IOException {
float[] currentMinScores = new float[3];
float[] minCompetitiveScore = new float[1];
Scorable scorer = new Scorable() {
-
+
@Override
public float score() throws IOException {
return 0;
}
-
+
@Override
public int docID() {
return 0;
}
-
+
@Override
public void setMinCompetitiveScore(float minScore) throws IOException {
minCompetitiveScore[0] = minScore;
@@ -413,7 +476,7 @@ public class TestMultiCollector extends LuceneTestCase {
s0.setMinCompetitiveScore(Float.MAX_VALUE);
assertEquals(Float.MAX_VALUE, minCompetitiveScore[0], 0);
}
-
+
public void testCollectionTermination() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
@@ -436,25 +499,25 @@ public class TestMultiCollector extends LuceneTestCase {
assertFalse("c1 should be removed already", c1.collectCalled);
assertTrue("c2's collect should be called", c2.collectCalled);
c2.collectCalled = false;
-
+
expectThrows(CollectionTerminatedException.class, () -> {
lc.collect(2);
});
assertFalse("c1 should be removed already", c1.collectCalled);
assertFalse("c2 should be removed already", c2.collectCalled);
-
+
reader.close();
dir.close();
}
-
+
public void testSetScorerOnCollectionTerminationSkipNonCompetitive() throws IOException {
doTestSetScorerOnCollectionTermination(true);
}
-
+
public void testSetScorerOnCollectionTerminationSkipNoSkips() throws IOException {
doTestSetScorerOnCollectionTermination(false);
}
-
+
private void doTestSetScorerOnCollectionTermination(boolean allowSkipNonCompetitive) throws IOException {
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
@@ -462,10 +525,10 @@ public class TestMultiCollector extends LuceneTestCase {
DirectoryReader reader = iw.getReader();
iw.close();
final LeafReaderContext ctx = reader.leaves().get(0);
-
+
DummyCollector c1 = new TerminatingDummyCollector(1, allowSkipNonCompetitive? ScoreMode.TOP_SCORES : ScoreMode.COMPLETE);
DummyCollector c2 = new TerminatingDummyCollector(2, allowSkipNonCompetitive? ScoreMode.TOP_SCORES : ScoreMode.COMPLETE);
-
+
Collector mc = MultiCollector.wrap(c1, c2);
LeafCollector lc = mc.getLeafCollector(ctx);
assertFalse(c1.setScorerCalled);
@@ -476,41 +539,41 @@ public class TestMultiCollector extends LuceneTestCase {
c1.setScorerCalled = false;
c2.setScorerCalled = false;
lc.collect(0); // OK
-
+
lc.setScorer(new ScoreAndDoc());
assertTrue(c1.setScorerCalled);
assertTrue(c2.setScorerCalled);
c1.setScorerCalled = false;
c2.setScorerCalled = false;
-
+
lc.collect(1); // OK, but c1 should terminate
lc.setScorer(new ScoreAndDoc());
assertFalse(c1.setScorerCalled);
assertTrue(c2.setScorerCalled);
c2.setScorerCalled = false;
-
+
expectThrows(CollectionTerminatedException.class, () -> {
lc.collect(2);
});
lc.setScorer(new ScoreAndDoc());
assertFalse(c1.setScorerCalled);
assertFalse(c2.setScorerCalled);
-
+
reader.close();
dir.close();
}
-
+
private static class TerminatingDummyCollector extends DummyCollector {
-
+
private final int terminateOnDoc;
private final ScoreMode scoreMode;
-
+
public TerminatingDummyCollector(int terminateOnDoc, ScoreMode scoreMode) {
super();
this.terminateOnDoc = terminateOnDoc;
this.scoreMode = scoreMode;
}
-
+
@Override
public void collect(int doc) throws IOException {
if (doc == terminateOnDoc) {
@@ -518,12 +581,12 @@ public class TestMultiCollector extends LuceneTestCase {
}
super.collect(doc);
}
-
+
@Override
public ScoreMode scoreMode() {
return scoreMode;
}
-
+
}
}