You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ji...@apache.org on 2021/11/18 17:08:45 UTC

[lucene] branch branch_9_0 updated: LUCENE-10208: Ensure that the minimum competitive score does not decrease in concurrent search (#431)

This is an automated email from the ASF dual-hosted git repository.

jimczi pushed a commit to branch branch_9_0
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/branch_9_0 by this push:
     new 0018567  LUCENE-10208: Ensure that the minimum competitive score does not decrease in concurrent search (#431)
0018567 is described below

commit 0018567e8b16f3ba2f88a912afdc6367bb3ce675
Author: Jim Ferenczi <ji...@elastic.co>
AuthorDate: Tue Nov 9 11:04:17 2021 +0100

    LUCENE-10208: Ensure that the minimum competitive score does not decrease in concurrent search (#431)
    
    Co-authored-by: Adrien Grand <jp...@gmail.com>
---
 lucene/CHANGES.txt                                 |  2 +
 .../apache/lucene/search/MaxScoreAccumulator.java  | 50 ++++++++++++++------
 .../apache/lucene/search/TopFieldCollector.java    | 12 +++--
 .../apache/lucene/search/TopScoreDocCollector.java | 24 +++++++---
 .../lucene/search/TestMaxScoreAccumulator.java     | 22 ++++++---
 .../apache/lucene/search/TestTopDocsCollector.java | 42 ++++++-----------
 .../lucene/search/TestTopFieldCollector.java       | 53 ++++++----------------
 7 files changed, 108 insertions(+), 97 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1c6d089..f4a5557 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -496,6 +496,8 @@ Bug Fixes
 
 * LUCENE-10154: NumericLeafComparator to define getPointValues. (Mayya Sharipova, Adrien Grand)
 
+* LUCENE-10208: Ensure that the minimum competitive score does not decrease in concurrent search. (Jim Ferenczi, Adrien Grand)
+
 Build
 ---------------------
 
diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java
index fa66238..b0f60f8 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java
@@ -26,7 +26,7 @@ final class MaxScoreAccumulator {
   static final int DEFAULT_INTERVAL = 0x3ff;
 
   // scores are always positive
-  final LongAccumulator acc = new LongAccumulator(Long::max, Long.MIN_VALUE);
+  final LongAccumulator acc = new LongAccumulator(MaxScoreAccumulator::maxEncode, Long.MIN_VALUE);
 
   // non-final and visible for tests
   long modInterval;
@@ -35,9 +35,26 @@ final class MaxScoreAccumulator {
     this.modInterval = DEFAULT_INTERVAL;
   }
 
-  void accumulate(int docID, float score) {
-    assert docID >= 0 && score >= 0;
-    long encode = (((long) Float.floatToIntBits(score)) << 32) | docID;
+  /**
+   * Return the max encoded DocAndScore in a way that is consistent with {@link
+   * DocAndScore#compareTo}.
+   */
+  private static long maxEncode(long v1, long v2) {
+    float score1 = Float.intBitsToFloat((int) (v1 >> 32));
+    float score2 = Float.intBitsToFloat((int) (v2 >> 32));
+    int cmp = Float.compare(score1, score2);
+    if (cmp == 0) {
+      // tie-break on the minimum doc base
+      return (int) v1 < (int) v2 ? v1 : v2;
+    } else if (cmp > 0) {
+      return v1;
+    }
+    return v2;
+  }
+
+  void accumulate(int docBase, float score) {
+    assert docBase >= 0 && score >= 0;
+    long encode = (((long) Float.floatToIntBits(score)) << 32) | docBase;
     acc.accumulate(encode);
   }
 
@@ -47,16 +64,16 @@ final class MaxScoreAccumulator {
       return null;
     }
     float score = Float.intBitsToFloat((int) (value >> 32));
-    int docID = (int) value;
-    return new DocAndScore(docID, score);
+    int docBase = (int) value;
+    return new DocAndScore(docBase, score);
   }
 
   static class DocAndScore implements Comparable<DocAndScore> {
-    final int docID;
+    final int docBase;
     final float score;
 
-    DocAndScore(int docID, float score) {
-      this.docID = docID;
+    DocAndScore(int docBase, float score) {
+      this.docBase = docBase;
       this.score = score;
     }
 
@@ -64,7 +81,14 @@ final class MaxScoreAccumulator {
     public int compareTo(DocAndScore o) {
       int cmp = Float.compare(score, o.score);
       if (cmp == 0) {
-        return Integer.compare(docID, o.docID);
+        // tie-break on the minimum doc base
+        // For a given minimum competitive score, we want to know the first segment
+        // where this score occurred, hence the reverse order here.
+        // On segments with a lower docBase, any document whose score is greater
+        // than or equal to this score would be competitive, while on segments with a
+        // higher docBase, documents need to have a strictly greater score to be
+        // competitive since we tie break on doc ID.
+        return Integer.compare(o.docBase, docBase);
       }
       return cmp;
     }
@@ -74,17 +98,17 @@ final class MaxScoreAccumulator {
       if (this == o) return true;
       if (o == null || getClass() != o.getClass()) return false;
       DocAndScore result = (DocAndScore) o;
-      return docID == result.docID && Float.compare(result.score, score) == 0;
+      return docBase == result.docBase && Float.compare(result.score, score) == 0;
     }
 
     @Override
     public int hashCode() {
-      return Objects.hash(docID, score);
+      return Objects.hash(docBase, score);
     }
 
     @Override
     public String toString() {
-      return "DocAndScore{" + "docID=" + docID + ", score=" + score + '}';
+      return "DocAndScore{" + "docBase=" + docBase + ", score=" + score + '}';
     }
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
index db97704..0023d71 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopFieldCollector.java
@@ -134,9 +134,9 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
     public void setScorer(Scorable scorer) throws IOException {
       this.scorer = scorer;
       comparator.setScorer(scorer);
-      minCompetitiveScore = 0f;
-      updateMinCompetitiveScore(scorer);
-      if (minScoreAcc != null) {
+      if (minScoreAcc == null) {
+        updateMinCompetitiveScore(scorer);
+      } else {
         updateGlobalMinCompetitiveScore(scorer);
       }
     }
@@ -191,6 +191,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
 
     @Override
     public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
+      // reset the minimum competitive score
+      minCompetitiveScore = 0f;
       docBase = context.docBase;
 
       return new TopFieldLeafCollector(queue, sort, context) {
@@ -244,6 +246,8 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
 
     @Override
     public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
+      // reset the minimum competitive score
+      minCompetitiveScore = 0f;
       docBase = context.docBase;
       final int afterDoc = after.doc - docBase;
 
@@ -363,7 +367,7 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
         minCompetitiveScore = minScore;
         totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
         if (minScoreAcc != null) {
-          minScoreAcc.accumulate(bottom.doc, minScore);
+          minScoreAcc.accumulate(docBase, minScore);
         }
       }
     }
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
index 0f7b526..0c4ccdc 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
@@ -55,14 +55,15 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
     public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
       // reset the minimum competitive score
       docBase = context.docBase;
-      return new ScorerLeafCollector() {
+      minCompetitiveScore = 0f;
 
+      return new ScorerLeafCollector() {
         @Override
         public void setScorer(Scorable scorer) throws IOException {
           super.setScorer(scorer);
-          minCompetitiveScore = 0f;
-          updateMinCompetitiveScore(scorer);
-          if (minScoreAcc != null) {
+          if (minScoreAcc == null) {
+            updateMinCompetitiveScore(scorer);
+          } else {
             updateGlobalMinCompetitiveScore(scorer);
           }
         }
@@ -132,9 +133,20 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
     public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
       docBase = context.docBase;
       final int afterDoc = after.doc - context.docBase;
+      minCompetitiveScore = 0f;
 
       return new ScorerLeafCollector() {
         @Override
+        public void setScorer(Scorable scorer) throws IOException {
+          super.setScorer(scorer);
+          if (minScoreAcc == null) {
+            updateMinCompetitiveScore(scorer);
+          } else {
+            updateGlobalMinCompetitiveScore(scorer);
+          }
+        }
+
+        @Override
         public void collect(int doc) throws IOException {
           float score = scorer.score();
 
@@ -307,7 +319,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
       // the next float if the global minimum score is set on a document id that is
       // smaller than the ids in the current leaf
       float score =
-          docBase > maxMinScore.docID ? Math.nextUp(maxMinScore.score) : maxMinScore.score;
+          docBase >= maxMinScore.docBase ? Math.nextUp(maxMinScore.score) : maxMinScore.score;
       if (score > minCompetitiveScore) {
         assert hitsThresholdChecker.isThresholdReached();
         scorer.setMinCompetitiveScore(score);
@@ -332,7 +344,7 @@ public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
           // we don't use the next float but we register the document
           // id so that other leaves can require it if they are after
           // the current maximum
-          minScoreAcc.accumulate(pqTop.doc, pqTop.score);
+          minScoreAcc.accumulate(docBase, pqTop.score);
         }
       }
     }
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java
index d53a2a2..28bec9c 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestMaxScoreAccumulator.java
@@ -23,21 +23,29 @@ public class TestMaxScoreAccumulator extends LuceneTestCase {
   public void testSimple() {
     MaxScoreAccumulator acc = new MaxScoreAccumulator();
     acc.accumulate(0, 0f);
+    assertEquals(0f, acc.get().score, 0);
+    assertEquals(0, acc.get().docBase, 0);
     acc.accumulate(10, 0f);
     assertEquals(0f, acc.get().score, 0);
-    assertEquals(10, acc.get().docID, 0);
+    assertEquals(0, acc.get().docBase, 0);
     acc.accumulate(100, 1000f);
     assertEquals(1000f, acc.get().score, 0);
-    assertEquals(100, acc.get().docID, 0);
+    assertEquals(100, acc.get().docBase, 0);
     acc.accumulate(1000, 5f);
     assertEquals(1000f, acc.get().score, 0);
-    assertEquals(100, acc.get().docID, 0);
+    assertEquals(100, acc.get().docBase, 0);
     acc.accumulate(99, 1000f);
     assertEquals(1000f, acc.get().score, 0);
-    assertEquals(100, acc.get().docID, 0);
-    acc.accumulate(0, 1001f);
+    assertEquals(99, acc.get().docBase, 0);
+    acc.accumulate(1000, 1001f);
+    assertEquals(1001f, acc.get().score, 0);
+    assertEquals(1000, acc.get().docBase, 0);
+    acc.accumulate(10, 1001f);
+    assertEquals(1001f, acc.get().score, 0);
+    assertEquals(10, acc.get().docBase, 0);
+    acc.accumulate(100, 1001f);
     assertEquals(1001f, acc.get().score, 0);
-    assertEquals(0, acc.get().docID, 0);
+    assertEquals(10, acc.get().docBase, 0);
   }
 
   public void testRandom() {
@@ -48,7 +56,7 @@ public class TestMaxScoreAccumulator extends LuceneTestCase {
     for (int i = 0; i < numDocs; i++) {
       MaxScoreAccumulator.DocAndScore res =
           new MaxScoreAccumulator.DocAndScore(random().nextInt(maxDocs), random().nextFloat());
-      acc.accumulate(res.docID, res.score);
+      acc.accumulate(res.docBase, res.score);
       if (res.compareTo(max) > 0) {
         max = res;
       }
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
index 35b5dbd..ceecb82 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsCollector.java
@@ -18,10 +18,6 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 import java.util.Arrays;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Store;
@@ -41,7 +37,6 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.NamedThreadFactory;
 
 public class TestTopDocsCollector extends LuceneTestCase {
 
@@ -141,7 +136,7 @@ public class TestTopDocsCollector extends LuceneTestCase {
 
   private TopDocsCollector<ScoreDoc> doSearchWithThreshold(
       int numResults, int thresHold, Query q, IndexReader indexReader) throws IOException {
-    IndexSearcher searcher = new IndexSearcher(indexReader);
+    IndexSearcher searcher = newSearcher(indexReader, true, true, false);
     TopDocsCollector<ScoreDoc> tdc = TopScoreDocCollector.create(numResults, thresHold);
     searcher.search(q, tdc);
     return tdc;
@@ -149,24 +144,10 @@ public class TestTopDocsCollector extends LuceneTestCase {
 
   private TopDocs doConcurrentSearchWithThreshold(
       int numResults, int threshold, Query q, IndexReader indexReader) throws IOException {
-    ExecutorService service =
-        new ThreadPoolExecutor(
-            4,
-            4,
-            0L,
-            TimeUnit.MILLISECONDS,
-            new LinkedBlockingQueue<Runnable>(),
-            new NamedThreadFactory("TestTopDocsCollector"));
-    try {
-      IndexSearcher searcher = new IndexSearcher(indexReader, service);
-
-      CollectorManager<TopScoreDocCollector, TopDocs> collectorManager =
-          TopScoreDocCollector.createSharedManager(numResults, null, threshold);
-
-      return searcher.search(q, collectorManager);
-    } finally {
-      service.shutdown();
-    }
+    IndexSearcher searcher = newSearcher(indexReader, true, true, true);
+    CollectorManager<TopScoreDocCollector, TopDocs> collectorManager =
+        TopScoreDocCollector.createSharedManager(numResults, null, threshold);
+    return searcher.search(q, collectorManager);
   }
 
   @Override
@@ -303,8 +284,9 @@ public class TestTopDocsCollector extends LuceneTestCase {
     Float minCompetitiveScore = null;
 
     @Override
-    public void setMinCompetitiveScore(float minCompetitiveScore) {
-      this.minCompetitiveScore = minCompetitiveScore;
+    public void setMinCompetitiveScore(float score) {
+      assert minCompetitiveScore == null || score >= minCompetitiveScore;
+      this.minCompetitiveScore = score;
     }
 
     @Override
@@ -356,9 +338,9 @@ public class TestTopDocsCollector extends LuceneTestCase {
     scorer.doc = 3;
     scorer.score = 0.5f;
     // Make sure we do not call setMinCompetitiveScore for non-competitive hits
-    scorer.minCompetitiveScore = Float.NaN;
+    scorer.minCompetitiveScore = null;
     leafCollector.collect(3);
-    assertTrue(Float.isNaN(scorer.minCompetitiveScore));
+    assertNull(scorer.minCompetitiveScore);
 
     scorer.doc = 4;
     scorer.score = 4;
@@ -613,6 +595,10 @@ public class TestTopDocsCollector extends LuceneTestCase {
     assertEquals(11, topDocs.totalHits.value);
     assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits);
 
+    leafCollector.setScorer(scorer);
+    leafCollector2.setScorer(scorer2);
+    leafCollector3.setScorer(scorer3);
+
     reader.close();
     dir.close();
   }
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
index 8419caa..9d99f87 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java
@@ -21,10 +21,6 @@ import static org.apache.lucene.search.SortField.FIELD_SCORE;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Comparator;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Store;
@@ -42,7 +38,6 @@ import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.search.FieldValueHitQueue.Entry;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.NamedThreadFactory;
 import org.apache.lucene.util.TestUtil;
 
 public class TestTopFieldCollector extends LuceneTestCase {
@@ -75,7 +70,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
   private TopFieldCollector doSearchWithThreshold(
       int numResults, int thresHold, Query q, Sort sort, IndexReader indexReader)
       throws IOException {
-    IndexSearcher searcher = new IndexSearcher(indexReader);
+    IndexSearcher searcher = newSearcher(indexReader);
     TopFieldCollector tdc = TopFieldCollector.create(sort, numResults, thresHold);
     searcher.search(q, tdc);
     return tdc;
@@ -84,26 +79,14 @@ public class TestTopFieldCollector extends LuceneTestCase {
   private TopDocs doConcurrentSearchWithThreshold(
       int numResults, int threshold, Query q, Sort sort, IndexReader indexReader)
       throws IOException {
-    ExecutorService service =
-        new ThreadPoolExecutor(
-            4,
-            4,
-            0L,
-            TimeUnit.MILLISECONDS,
-            new LinkedBlockingQueue<Runnable>(),
-            new NamedThreadFactory("TestTopDocsCollector"));
-    try {
-      IndexSearcher searcher = new IndexSearcher(indexReader, service);
-
-      CollectorManager<TopFieldCollector, TopFieldDocs> collectorManager =
-          TopFieldCollector.createSharedManager(sort, numResults, null, threshold);
-
-      TopDocs tdc = searcher.search(q, collectorManager);
-
-      return tdc;
-    } finally {
-      service.shutdown();
-    }
+    IndexSearcher searcher = newSearcher(indexReader, true, true, true);
+
+    CollectorManager<TopFieldCollector, TopFieldDocs> collectorManager =
+        TopFieldCollector.createSharedManager(sort, numResults, null, threshold);
+
+    TopDocs tdc = searcher.search(q, collectorManager);
+
+    return tdc;
   }
 
   public void testSortWithoutFillFields() throws Exception {
@@ -146,17 +129,7 @@ public class TestTopFieldCollector extends LuceneTestCase {
   }
 
   public void testSharedHitcountCollector() throws Exception {
-
-    ExecutorService service =
-        new ThreadPoolExecutor(
-            4,
-            4,
-            0L,
-            TimeUnit.MILLISECONDS,
-            new LinkedBlockingQueue<Runnable>(),
-            new NamedThreadFactory("TestTopFieldCollector"));
-
-    IndexSearcher concurrentSearcher = new IndexSearcher(ir, service);
+    IndexSearcher concurrentSearcher = newSearcher(ir, true, true, true);
 
     // Two Sort criteria to instantiate the multi/single comparators.
     Sort[] sort = new Sort[] {new Sort(SortField.FIELD_DOC), new Sort()};
@@ -178,8 +151,6 @@ public class TestTopFieldCollector extends LuceneTestCase {
 
       CheckHits.checkEqual(q, td.scoreDocs, td2.scoreDocs);
     }
-
-    service.shutdown();
   }
 
   public void testSortWithoutTotalHitTracking() throws Exception {
@@ -678,6 +649,10 @@ public class TestTopFieldCollector extends LuceneTestCase {
     assertEquals(11, topDocs.totalHits.value);
     assertEquals(new TotalHits(11, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO), topDocs.totalHits);
 
+    leafCollector.setScorer(scorer);
+    leafCollector2.setScorer(scorer2);
+    leafCollector3.setScorer(scorer3);
+
     reader.close();
     dir.close();
   }