You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by tf...@apache.org on 2020/06/18 17:20:00 UTC

[lucene-solr] branch master updated: LUCENE-9402: Let MultiCollector handle minCompetitiveScore (#1567)

This is an automated email from the ASF dual-hosted git repository.

tflobbe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 4db1e38  LUCENE-9402: Let MultiCollector handle minCompetitiveScore (#1567)
4db1e38 is described below

commit 4db1e3895fec7cd50b0ad266af5db0757bb5780a
Author: Tomas Fernandez Lobbe <tf...@apache.org>
AuthorDate: Thu Jun 18 10:19:49 2020 -0700

    LUCENE-9402: Let MultiCollector handle minCompetitiveScore (#1567)
---
 lucene/CHANGES.txt                                 |   2 +
 .../org/apache/lucene/search/MultiCollector.java   | 111 +++++++++----
 .../apache/lucene/search/MultiCollectorTest.java   | 172 +++++++++++++++++++++
 3 files changed, 251 insertions(+), 34 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 8e27bc0..69c9c2a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -216,6 +216,8 @@ Improvements
 
 * LUCENE-9396: Improved truncation detection for points. (Adrien Grand, Robert Muir)
 
+* LUCENE-9402: Let MultiCollector handle minCompetitiveScore (Tomás Fernández Löbbe, Adrien Grand)
+
 Optimizations
 ---------------------
 
diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
index 82251e4..5cb6db8 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
@@ -117,7 +117,7 @@ public class MultiCollector implements Collector {
 
   @Override
   public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
-    final List<LeafCollector> leafCollectors = new ArrayList<>();
+    final List<LeafCollector> leafCollectors = new ArrayList<>(collectors.length);
     for (Collector collector : collectors) {
       final LeafCollector leafCollector;
       try {
@@ -134,7 +134,7 @@ public class MultiCollector implements Collector {
       case 1:
         return leafCollectors.get(0);
       default:
-        return new MultiLeafCollector(leafCollectors, cacheScores);
+        return new MultiLeafCollector(leafCollectors, cacheScores, scoreMode() == ScoreMode.TOP_SCORES);
     }
   }
 
@@ -142,12 +142,14 @@ public class MultiCollector implements Collector {
 
     private final boolean cacheScores;
     private final LeafCollector[] collectors;
-    private int numCollectors;
+    private final float[] minScores;
+    private final boolean skipNonCompetitiveScores;
 
-    private MultiLeafCollector(List<LeafCollector> collectors, boolean cacheScores) {
+    private MultiLeafCollector(List<LeafCollector> collectors, boolean cacheScores, boolean skipNonCompetitive) {
       this.collectors = collectors.toArray(new LeafCollector[collectors.size()]);
       this.cacheScores = cacheScores;
-      this.numCollectors = this.collectors.length;
+      this.skipNonCompetitiveScores = skipNonCompetitive;
+      this.minScores = this.skipNonCompetitiveScores ? new float[this.collectors.length] : null;
     }
 
     @Override
@@ -155,48 +157,89 @@ public class MultiCollector implements Collector {
       if (cacheScores) {
         scorer = new ScoreCachingWrappingScorer(scorer);
       }
-      scorer = new FilterScorable(scorer) {
-        @Override
-        public void setMinCompetitiveScore(float minScore) {
-          // Ignore calls to setMinCompetitiveScore so that if we wrap two
-          // collectors and one of them wants to skip low-scoring hits, then
-          // the other collector still sees all hits. We could try to reconcile
-          // min scores and take the maximum min score across collectors, but
-          // this is very unlikely to be helpful in practice.
+      if (skipNonCompetitiveScores) {
+        for (int i = 0; i < collectors.length; ++i) {
+          final LeafCollector c = collectors[i];
+          if (c != null) {
+            c.setScorer(new MinCompetitiveScoreAwareScorable(scorer,  i,  minScores));
+          }
         }
+      } else {
+        scorer = new FilterScorable(scorer) {
+          @Override
+          public void setMinCompetitiveScore(float minScore) throws IOException {
+            // Ignore calls to setMinCompetitiveScore so that if we wrap two
+            // collectors and one of them wants to skip low-scoring hits, then
+            // the other collector still sees all hits.
+          }
 
-      };
-      for (int i = 0; i < numCollectors; ++i) {
-        final LeafCollector c = collectors[i];
-        c.setScorer(scorer);
+        };
+        for (int i = 0; i < collectors.length; ++i) {
+          final LeafCollector c = collectors[i];
+          if (c != null) {
+            c.setScorer(scorer);
+          }
+        }
       }
     }
 
-    private void removeCollector(int i) {
-      System.arraycopy(collectors, i + 1, collectors, i, numCollectors - i - 1);
-      --numCollectors;
-      collectors[numCollectors] = null;
-    }
-
     @Override
     public void collect(int doc) throws IOException {
-      final LeafCollector[] collectors = this.collectors;
-      int numCollectors = this.numCollectors;
-      for (int i = 0; i < numCollectors; ) {
+      for (int i = 0; i < collectors.length; i++) {
         final LeafCollector collector = collectors[i];
-        try {
-          collector.collect(doc);
-          ++i;
-        } catch (CollectionTerminatedException e) {
-          removeCollector(i);
-          numCollectors = this.numCollectors;
-          if (numCollectors == 0) {
-            throw new CollectionTerminatedException();
+        if (collector != null) {
+          try {
+            collector.collect(doc);
+          } catch (CollectionTerminatedException e) {
+            collectors[i] = null;
+            if (allCollectorsTerminated()) {
+              throw new CollectionTerminatedException();
+            }
           }
         }
       }
     }
 
+    private boolean allCollectorsTerminated() {
+      for (int i = 0; i < collectors.length; i++) {
+        if (collectors[i] != null) {
+          return false;
+        }
+      }
+      return true;
+    }
+
+  }
+  
+  final static class MinCompetitiveScoreAwareScorable extends FilterScorable {
+    
+    private final int idx;
+    private final float[] minScores;
+
+    MinCompetitiveScoreAwareScorable(Scorable in, int idx, float[] minScores) {
+      super(in);
+      this.idx = idx;
+      this.minScores = minScores;
+    }
+    
+    @Override
+    public void setMinCompetitiveScore(float minScore) throws IOException {
+      if (minScore > minScores[idx]) {
+        minScores[idx] = minScore;
+        in.setMinCompetitiveScore(minScore());
+      }
+    }
+
+    private float minScore() {
+      float min = Float.MAX_VALUE;
+      for (int i = 0; i < minScores.length; i++) {
+        if (minScores[i] < min) {
+          min = minScores[i];
+        }
+      }
+      return min;
+    }
+    
   }
 
 }
diff --git a/lucene/core/src/test/org/apache/lucene/search/MultiCollectorTest.java b/lucene/core/src/test/org/apache/lucene/search/MultiCollectorTest.java
index c3b4f42..80a5a9a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/MultiCollectorTest.java
+++ b/lucene/core/src/test/org/apache/lucene/search/MultiCollectorTest.java
@@ -163,4 +163,176 @@ public class MultiCollectorTest extends LuceneTestCase {
     reader.close();
     dir.close();
   }
+  
+  public void testScorerWrappingForTopScores() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    iw.addDocument(new Document());
+    DirectoryReader reader = iw.getReader();
+    iw.close();
+    final LeafReaderContext ctx = reader.leaves().get(0);
+    Collector c1 = collector(ScoreMode.TOP_SCORES, MultiCollector.MinCompetitiveScoreAwareScorable.class);
+    Collector c2 = collector(ScoreMode.TOP_SCORES, MultiCollector.MinCompetitiveScoreAwareScorable.class);
+    MultiCollector.wrap(c1, c2).getLeafCollector(ctx).setScorer(new ScoreAndDoc());
+    
+    c1 = collector(ScoreMode.TOP_SCORES, ScoreCachingWrappingScorer.class);
+    c2 = collector(ScoreMode.COMPLETE, ScoreCachingWrappingScorer.class);
+    MultiCollector.wrap(c1, c2).getLeafCollector(ctx).setScorer(new ScoreAndDoc());
+    
+    reader.close();
+    dir.close();
+  }
+  
+  public void testMinCompetitiveScore() throws IOException {
+    float[] currentMinScores = new float[3];
+    float[] minCompetitiveScore = new float[1];
+    Scorable scorer = new Scorable() {
+      
+      @Override
+      public float score() throws IOException {
+        return 0;
+      }
+      
+      @Override
+      public int docID() {
+        return 0;
+      }
+      
+      @Override
+      public void setMinCompetitiveScore(float minScore) throws IOException {
+        minCompetitiveScore[0] = minScore;
+      }
+    };
+    Scorable s0 = new MultiCollector.MinCompetitiveScoreAwareScorable(scorer, 0, currentMinScores);
+    Scorable s1 = new MultiCollector.MinCompetitiveScoreAwareScorable(scorer, 1, currentMinScores);
+    Scorable s2 = new MultiCollector.MinCompetitiveScoreAwareScorable(scorer, 2, currentMinScores);
+    assertEquals(0f, minCompetitiveScore[0], 0);
+    s0.setMinCompetitiveScore(0.5f);
+    assertEquals(0f, minCompetitiveScore[0], 0);
+    s1.setMinCompetitiveScore(0.8f);
+    assertEquals(0f, minCompetitiveScore[0], 0);
+    s2.setMinCompetitiveScore(0.3f);
+    assertEquals(0.3f, minCompetitiveScore[0], 0);
+    s2.setMinCompetitiveScore(0.1f);
+    assertEquals(0.3f, minCompetitiveScore[0], 0);
+    s1.setMinCompetitiveScore(Float.MAX_VALUE);
+    assertEquals(0.3f, minCompetitiveScore[0], 0);
+    s2.setMinCompetitiveScore(Float.MAX_VALUE);
+    assertEquals(0.5f, minCompetitiveScore[0], 0);
+    s0.setMinCompetitiveScore(Float.MAX_VALUE);
+    assertEquals(Float.MAX_VALUE, minCompetitiveScore[0], 0);
+  }
+  
+  public void testCollectionTermination() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    iw.addDocument(new Document());
+    DirectoryReader reader = iw.getReader();
+    iw.close();
+    final LeafReaderContext ctx = reader.leaves().get(0);
+    DummyCollector c1 = new TerminatingDummyCollector(1, ScoreMode.COMPLETE);
+    DummyCollector c2 = new TerminatingDummyCollector(2, ScoreMode.COMPLETE);
+
+    Collector mc = MultiCollector.wrap(c1, c2);
+    LeafCollector lc = mc.getLeafCollector(ctx);
+    lc.setScorer(new ScoreAndDoc());
+    lc.collect(0); // OK
+    assertTrue("c1's collect should be called", c1.collectCalled);
+    assertTrue("c2's collect should be called", c2.collectCalled);
+    c1.collectCalled = false;
+    c2.collectCalled = false;
+    lc.collect(1); // OK, but c1 should terminate
+    assertFalse("c1 should be removed already", c1.collectCalled);
+    assertTrue("c2's collect should be called", c2.collectCalled);
+    c2.collectCalled = false;
+    
+    expectThrows(CollectionTerminatedException.class, () -> {
+      lc.collect(2);
+    });
+    assertFalse("c1 should be removed already", c1.collectCalled);
+    assertFalse("c2 should be removed already", c2.collectCalled);
+    
+    reader.close();
+    dir.close();
+  }
+  
+  public void testSetScorerOnCollectionTerminationSkipNonCompetitive() throws IOException {
+    doTestSetScorerOnCollectionTermination(true);
+  }
+  
+  public void testSetScorerOnCollectionTerminationSkipNoSkips() throws IOException {
+    doTestSetScorerOnCollectionTermination(false);
+  }
+  
+  private void doTestSetScorerOnCollectionTermination(boolean allowSkipNonCompetitive) throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    iw.addDocument(new Document());
+    DirectoryReader reader = iw.getReader();
+    iw.close();
+    final LeafReaderContext ctx = reader.leaves().get(0);
+    
+    DummyCollector c1 = new TerminatingDummyCollector(1, allowSkipNonCompetitive? ScoreMode.TOP_SCORES : ScoreMode.COMPLETE);
+    DummyCollector c2 = new TerminatingDummyCollector(2, allowSkipNonCompetitive? ScoreMode.TOP_SCORES : ScoreMode.COMPLETE);
+    
+    Collector mc = MultiCollector.wrap(c1, c2);
+    LeafCollector lc = mc.getLeafCollector(ctx);
+    assertFalse(c1.setScorerCalled);
+    assertFalse(c2.setScorerCalled);
+    lc.setScorer(new ScoreAndDoc());
+    assertTrue(c1.setScorerCalled);
+    assertTrue(c2.setScorerCalled);
+    c1.setScorerCalled = false;
+    c2.setScorerCalled = false;
+    lc.collect(0); // OK
+    
+    lc.setScorer(new ScoreAndDoc());
+    assertTrue(c1.setScorerCalled);
+    assertTrue(c2.setScorerCalled);
+    c1.setScorerCalled = false;
+    c2.setScorerCalled = false;
+    
+    lc.collect(1); // OK, but c1 should terminate
+    lc.setScorer(new ScoreAndDoc());
+    assertFalse(c1.setScorerCalled);
+    assertTrue(c2.setScorerCalled);
+    c2.setScorerCalled = false;
+    
+    expectThrows(CollectionTerminatedException.class, () -> {
+      lc.collect(2);
+    });
+    lc.setScorer(new ScoreAndDoc());
+    assertFalse(c1.setScorerCalled);
+    assertFalse(c2.setScorerCalled);
+    
+    reader.close();
+    dir.close();
+  }
+  
+  private static class TerminatingDummyCollector extends DummyCollector {
+    
+    private final int terminateOnDoc;
+    private final ScoreMode scoreMode;
+    
+    public TerminatingDummyCollector(int terminateOnDoc, ScoreMode scoreMode) {
+      super();
+      this.terminateOnDoc = terminateOnDoc;
+      this.scoreMode = scoreMode;
+    }
+    
+    @Override
+    public void collect(int doc) throws IOException {
+      if (doc == terminateOnDoc) {
+        throw new CollectionTerminatedException();
+      }
+      super.collect(doc);
+    }
+    
+    @Override
+    public ScoreMode scoreMode() {
+      return scoreMode;
+    }
+    
+  }
+
 }