You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ji...@apache.org on 2021/10/11 09:10:12 UTC

[lucene-solr] branch branch_8_10 updated: LUCENE-10110: MultiCollector should wrap single leaf collector that wants to skip low-scoring hits but the combined score mode doesn't allow it.

This is an automated email from the ASF dual-hosted git repository.

jimczi pushed a commit to branch branch_8_10
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8_10 by this push:
     new cd420d9  LUCENE-10110: MultiCollector should wrap single leaf collector that wants to skip low-scoring hits  but the combined score mode doesn't allow it.
cd420d9 is described below

commit cd420d949eece3b5fe15094ebb5c62c397bdb5ee
Author: jimczi <ji...@apache.org>
AuthorDate: Mon Sep 20 09:12:08 2021 +0200

    LUCENE-10110: MultiCollector should wrap single leaf collector that wants to skip low-scoring hits
     but the combined score mode doesn't allow it.
---
 lucene/CHANGES.txt                                 |   3 +
 .../org/apache/lucene/search/MultiCollector.java   |  46 +++++---
 .../apache/lucene/search/TestMultiCollector.java   | 121 ++++++++++++++++-----
 3 files changed, 125 insertions(+), 45 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1711a3a..82ebe5e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -8,6 +8,9 @@ http://s.apache.org/luceneversions
 Bug Fixes
 ---------------------
 
+* LUCENE-10110: MultiCollector now handles single leaf collector that wants to skip low-scoring hits
+ but the combined score mode doesn't allow it. (Jim Ferenczi)
+
 * LUCENE-10119: Sort optimization with search_after can wrongly skip documents
   whose values are equal to the last value of the previous page (Nhat Nguyen)
 
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
index c7178fa..8fc162c 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
@@ -25,10 +25,14 @@ import java.util.List;
 import org.apache.lucene.index.LeafReaderContext;
 
 /**
- * A {@link Collector} which allows running a search with several
- * {@link Collector}s. It offers a static {@link #wrap} method which accepts a
- * list of collectors and wraps them with {@link MultiCollector}, while
- * filtering out the <code>null</code> null ones.
+ * A {@link Collector} which allows running a search with several {@link Collector}s. It offers a
+ * static {@link #wrap} method which accepts a list of collectors and wraps them with {@link
+ * MultiCollector}, while filtering out the <code>null</code> null ones.
+ *
+ * <p><b>NOTE:</b>When mixing collectors that want to skip low-scoring hits ({@link
+ * ScoreMode#TOP_SCORES}) with ones that require to see all hits, such as mixing {@link
+ * TopScoreDocCollector} and {@link TotalHitCountCollector}, it should be faster to run the query
+ * twice, once for each collector, rather than using this wrapper on a single search.
  */
 public class MultiCollector implements Collector {
 
@@ -48,7 +52,7 @@ public class MultiCollector implements Collector {
    * <li>Otherwise the method returns a {@link MultiCollector} which wraps the
    * non-<code>null</code> ones.
    * </ul>
-   * 
+   *
    * @throws IllegalArgumentException
    *           if either 0 collectors were input, or all collectors are
    *           <code>null</code>.
@@ -87,7 +91,7 @@ public class MultiCollector implements Collector {
       return new MultiCollector(colls);
     }
   }
-  
+
   private final boolean cacheScores;
   private final Collector[] collectors;
 
@@ -118,6 +122,7 @@ public class MultiCollector implements Collector {
   @Override
   public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
     final List<LeafCollector> leafCollectors = new ArrayList<>(collectors.length);
+    ScoreMode leafScoreMode = null;
     for (Collector collector : collectors) {
       final LeafCollector leafCollector;
       try {
@@ -126,15 +131,24 @@ public class MultiCollector implements Collector {
         // this leaf collector does not need this segment
         continue;
       }
+      if (leafScoreMode == null) {
+        leafScoreMode = collector.scoreMode();
+      } else if (leafScoreMode != collector.scoreMode()) {
+        leafScoreMode = ScoreMode.COMPLETE;
+      }
       leafCollectors.add(leafCollector);
     }
-    switch (leafCollectors.size()) {
-      case 0:
-        throw new CollectionTerminatedException();
-      case 1:
+    if (leafCollectors.isEmpty()) {
+      throw new CollectionTerminatedException();
+    } else {
+      // Wraps single leaf collector that wants to skip low-scoring hits (ScoreMode.TOP_SCORES)
+      // but the global score mode doesn't allow it.
+      if (leafCollectors.size() == 1
+          && (scoreMode() == ScoreMode.TOP_SCORES || leafScoreMode != ScoreMode.TOP_SCORES)) {
         return leafCollectors.get(0);
-      default:
-        return new MultiLeafCollector(leafCollectors, cacheScores, scoreMode() == ScoreMode.TOP_SCORES);
+      }
+      return new MultiLeafCollector(
+          leafCollectors, cacheScores, scoreMode() == ScoreMode.TOP_SCORES);
     }
   }
 
@@ -210,9 +224,9 @@ public class MultiCollector implements Collector {
     }
 
   }
-  
+
   final static class MinCompetitiveScoreAwareScorable extends FilterScorable {
-    
+
     private final int idx;
     private final float[] minScores;
 
@@ -221,7 +235,7 @@ public class MultiCollector implements Collector {
       this.idx = idx;
       this.minScores = minScores;
     }
-    
+
     @Override
     public void setMinCompetitiveScore(float minScore) throws IOException {
       if (minScore > minScores[idx]) {
@@ -239,7 +253,7 @@ public class MultiCollector implements Collector {
       }
       return min;
     }
-    
+
   }
 
 }
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMultiCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestMultiCollector.java
index f1adc1b..f7f6ee3 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestMultiCollector.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMultiCollector.java
@@ -220,6 +220,69 @@ public class TestMultiCollector extends LuceneTestCase {
     dir.close();
   }
 
+  public void testDisablesSetMinScoreWithEarlyTermination() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+    w.addDocument(new Document());
+    IndexReader reader = DirectoryReader.open(w);
+    w.close();
+
+    Scorable scorer =
+        new Scorable() {
+          @Override
+          public int docID() {
+            throw new UnsupportedOperationException();
+          }
+
+          @Override
+          public float score() {
+            return 0;
+          }
+
+          @Override
+          public void setMinCompetitiveScore(float minScore) {
+            throw new AssertionError();
+          }
+        };
+
+    Collector collector =
+        new SimpleCollector() {
+          private Scorable scorer;
+          float minScore = 0;
+
+          @Override
+          public ScoreMode scoreMode() {
+            return ScoreMode.TOP_SCORES;
+          }
+
+          @Override
+          public void setScorer(Scorable scorer) throws IOException {
+            this.scorer = scorer;
+          }
+
+          @Override
+          public void collect(int doc) throws IOException {
+            minScore = Math.nextUp(minScore);
+            scorer.setMinCompetitiveScore(minScore);
+          }
+        };
+    for (int numCol = 1; numCol < 4; numCol++) {
+      List<Collector> cols = new ArrayList<>();
+      cols.add(collector);
+      for (int col = 0; col < numCol; col++) {
+        cols.add(new TerminateAfterCollector(new TotalHitCountCollector(), 0));
+      }
+      Collections.shuffle(cols, random());
+      Collector multiCollector = MultiCollector.wrap(cols);
+      LeafCollector leafCollector = multiCollector.getLeafCollector(reader.leaves().get(0));
+      leafCollector.setScorer(scorer);
+      leafCollector.collect(0); // no exception
+    }
+
+    reader.close();
+    dir.close();
+  }
+
   private static class DummyCollector extends SimpleCollector {
 
     boolean collectCalled = false;
@@ -271,7 +334,7 @@ public class TestMultiCollector extends LuceneTestCase {
     assertSame(dc, MultiCollector.wrap(dc));
     assertSame(dc, MultiCollector.wrap(dc, null));
   }
-  
+
   @Test
   public void testCollector() throws Exception {
     // Tests that the collector delegates calls to input collectors properly.
@@ -310,7 +373,7 @@ public class TestMultiCollector extends LuceneTestCase {
 
           @Override
           public void collect(int doc) throws IOException {}
-          
+
         };
       }
 
@@ -318,7 +381,7 @@ public class TestMultiCollector extends LuceneTestCase {
       public ScoreMode scoreMode() {
         return scoreMode;
       }
-      
+
     };
   }
 
@@ -329,7 +392,7 @@ public class TestMultiCollector extends LuceneTestCase {
     iw.commit();
     DirectoryReader reader = iw.getReader();
     iw.close();
-    
+
     final LeafReaderContext ctx = reader.leaves().get(0);
 
     expectThrows(AssertionError.class, () -> {
@@ -354,7 +417,7 @@ public class TestMultiCollector extends LuceneTestCase {
     reader.close();
     dir.close();
   }
-  
+
   public void testScorerWrappingForTopScores() throws IOException {
     Directory dir = newDirectory();
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
@@ -365,30 +428,30 @@ public class TestMultiCollector extends LuceneTestCase {
     Collector c1 = collector(ScoreMode.TOP_SCORES, MultiCollector.MinCompetitiveScoreAwareScorable.class);
     Collector c2 = collector(ScoreMode.TOP_SCORES, MultiCollector.MinCompetitiveScoreAwareScorable.class);
     MultiCollector.wrap(c1, c2).getLeafCollector(ctx).setScorer(new ScoreAndDoc());
-    
+
     c1 = collector(ScoreMode.TOP_SCORES, ScoreCachingWrappingScorer.class);
     c2 = collector(ScoreMode.COMPLETE, ScoreCachingWrappingScorer.class);
     MultiCollector.wrap(c1, c2).getLeafCollector(ctx).setScorer(new ScoreAndDoc());
-    
+
     reader.close();
     dir.close();
   }
-  
+
   public void testMinCompetitiveScore() throws IOException {
     float[] currentMinScores = new float[3];
     float[] minCompetitiveScore = new float[1];
     Scorable scorer = new Scorable() {
-      
+
       @Override
       public float score() throws IOException {
         return 0;
       }
-      
+
       @Override
       public int docID() {
         return 0;
       }
-      
+
       @Override
       public void setMinCompetitiveScore(float minScore) throws IOException {
         minCompetitiveScore[0] = minScore;
@@ -413,7 +476,7 @@ public class TestMultiCollector extends LuceneTestCase {
     s0.setMinCompetitiveScore(Float.MAX_VALUE);
     assertEquals(Float.MAX_VALUE, minCompetitiveScore[0], 0);
   }
-  
+
   public void testCollectionTermination() throws IOException {
     Directory dir = newDirectory();
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
@@ -436,25 +499,25 @@ public class TestMultiCollector extends LuceneTestCase {
     assertFalse("c1 should be removed already", c1.collectCalled);
     assertTrue("c2's collect should be called", c2.collectCalled);
     c2.collectCalled = false;
-    
+
     expectThrows(CollectionTerminatedException.class, () -> {
       lc.collect(2);
     });
     assertFalse("c1 should be removed already", c1.collectCalled);
     assertFalse("c2 should be removed already", c2.collectCalled);
-    
+
     reader.close();
     dir.close();
   }
-  
+
   public void testSetScorerOnCollectionTerminationSkipNonCompetitive() throws IOException {
     doTestSetScorerOnCollectionTermination(true);
   }
-  
+
   public void testSetScorerOnCollectionTerminationSkipNoSkips() throws IOException {
     doTestSetScorerOnCollectionTermination(false);
   }
-  
+
   private void doTestSetScorerOnCollectionTermination(boolean allowSkipNonCompetitive) throws IOException {
     Directory dir = newDirectory();
     RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
@@ -462,10 +525,10 @@ public class TestMultiCollector extends LuceneTestCase {
     DirectoryReader reader = iw.getReader();
     iw.close();
     final LeafReaderContext ctx = reader.leaves().get(0);
-    
+
     DummyCollector c1 = new TerminatingDummyCollector(1, allowSkipNonCompetitive? ScoreMode.TOP_SCORES : ScoreMode.COMPLETE);
     DummyCollector c2 = new TerminatingDummyCollector(2, allowSkipNonCompetitive? ScoreMode.TOP_SCORES : ScoreMode.COMPLETE);
-    
+
     Collector mc = MultiCollector.wrap(c1, c2);
     LeafCollector lc = mc.getLeafCollector(ctx);
     assertFalse(c1.setScorerCalled);
@@ -476,41 +539,41 @@ public class TestMultiCollector extends LuceneTestCase {
     c1.setScorerCalled = false;
     c2.setScorerCalled = false;
     lc.collect(0); // OK
-    
+
     lc.setScorer(new ScoreAndDoc());
     assertTrue(c1.setScorerCalled);
     assertTrue(c2.setScorerCalled);
     c1.setScorerCalled = false;
     c2.setScorerCalled = false;
-    
+
     lc.collect(1); // OK, but c1 should terminate
     lc.setScorer(new ScoreAndDoc());
     assertFalse(c1.setScorerCalled);
     assertTrue(c2.setScorerCalled);
     c2.setScorerCalled = false;
-    
+
     expectThrows(CollectionTerminatedException.class, () -> {
       lc.collect(2);
     });
     lc.setScorer(new ScoreAndDoc());
     assertFalse(c1.setScorerCalled);
     assertFalse(c2.setScorerCalled);
-    
+
     reader.close();
     dir.close();
   }
-  
+
   private static class TerminatingDummyCollector extends DummyCollector {
-    
+
     private final int terminateOnDoc;
     private final ScoreMode scoreMode;
-    
+
     public TerminatingDummyCollector(int terminateOnDoc, ScoreMode scoreMode) {
       super();
       this.terminateOnDoc = terminateOnDoc;
       this.scoreMode = scoreMode;
     }
-    
+
     @Override
     public void collect(int doc) throws IOException {
       if (doc == terminateOnDoc) {
@@ -518,12 +581,12 @@ public class TestMultiCollector extends LuceneTestCase {
       }
       super.collect(doc);
     }
-    
+
     @Override
     public ScoreMode scoreMode() {
       return scoreMode;
     }
-    
+
   }
 
 }