You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/03/01 09:27:29 UTC

[21/50] [abbrv] lucene-solr:jira/solr-9858: LUCENE-7707: add explicit boolean to TopDocs.merge to govern whether incoming or implicit shard index should be used

LUCENE-7707: add explicit boolean to TopDocs.merge to govern whether incoming or implicit shard index should be used


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2e56c0e5
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2e56c0e5
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2e56c0e5

Branch: refs/heads/jira/solr-9858
Commit: 2e56c0e50564c8feeeb2831dd36cff1e9b23a00f
Parents: 471d842
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Feb 24 17:00:45 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Feb 24 17:01:19 2017 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              | 10 +--
 .../java/org/apache/lucene/search/FieldDoc.java |  6 +-
 .../org/apache/lucene/search/IndexSearcher.java |  4 +-
 .../java/org/apache/lucene/search/TopDocs.java  | 68 +++++++++-----------
 .../apache/lucene/search/TestTopDocsMerge.java  | 38 ++++++-----
 5 files changed, 62 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e56c0e5/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 741418a..5d3a077 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -97,6 +97,12 @@ API Changes
 
 * LUCENE-7702: Removed GraphQuery in favour of simple boolean query. (Matt Webber via Jim Ferenczi)
 
+* LUCENE-7707: TopDocs.merge now takes a boolean option telling it
+  when to use the incoming shard index versus when to assign the shard
+  index itself, allowing users to merge shard responses incrementally
+  instead of once all shard responses are present. (Simon Willnauer,
+  Mike McCandless)
+
 New Features
 
 * LUCENE-7449: Add CROSSES relation support to RangeFieldQuery. (Nick Knize)
@@ -172,10 +178,6 @@ Improvements
   earlier than regular queries in order to improve cache efficiency.
   (Adrien Grand)
 
-* LUCENE-7707: Use predefined shard index when mergeing top docs if present. This 
-  allows to use TopDoc#merge to merge shard responses incrementally instead of
-  once all shard responses are present. (Simon Willnauer)
-
 Optimizations
 
 * LUCENE-7641: Optimized point range queries to compute documents that do not

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e56c0e5/lucene/core/src/java/org/apache/lucene/search/FieldDoc.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldDoc.java b/lucene/core/src/java/org/apache/lucene/search/FieldDoc.java
index 31f7175..6125404 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FieldDoc.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldDoc.java
@@ -52,18 +52,18 @@ public class FieldDoc extends ScoreDoc {
 
   /** Expert: Creates one of these objects with empty sort information. */
   public FieldDoc(int doc, float score) {
-    super (doc, score);
+    super(doc, score);
   }
 
   /** Expert: Creates one of these objects with the given sort information. */
   public FieldDoc(int doc, float score, Object[] fields) {
-    super (doc, score);
+    super(doc, score);
     this.fields = fields;
   }
   
   /** Expert: Creates one of these objects with the given sort information. */
   public FieldDoc(int doc, float score, Object[] fields, int shardIndex) {
-    super (doc, score, shardIndex);
+    super(doc, score, shardIndex);
     this.fields = fields;
   }
   

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e56c0e5/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
index 5cae122..5ab16c2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -432,7 +432,7 @@ public class IndexSearcher {
         for (TopScoreDocCollector collector : collectors) {
           topDocs[i++] = collector.topDocs();
         }
-        return TopDocs.merge(cappedNumHits, topDocs);
+        return TopDocs.merge(0, cappedNumHits, topDocs, true);
       }
 
     };
@@ -559,7 +559,7 @@ public class IndexSearcher {
         for (TopFieldCollector collector : collectors) {
           topDocs[i++] = collector.topDocs();
         }
-        return TopDocs.merge(sort, cappedNumHits, topDocs);
+        return TopDocs.merge(sort, 0, cappedNumHits, topDocs, true);
       }
 
     };

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e56c0e5/lucene/core/src/java/org/apache/lucene/search/TopDocs.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopDocs.java b/lucene/core/src/java/org/apache/lucene/search/TopDocs.java
index 2913cb2..3b66fca 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopDocs.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopDocs.java
@@ -16,7 +16,6 @@
  */
 package org.apache.lucene.search;
 
-
 import org.apache.lucene.util.PriorityQueue;
 
 /** Represents hits returned by {@link
@@ -60,6 +59,8 @@ public class TopDocs {
   private final static class ShardRef {
     // Which shard (index into shardHits[]):
     final int shardIndex;
+
+    // True if we should use the incoming ScoreDoc.shardIndex for sort order
     final boolean useScoreDocIndex;
 
     // Which hit within the shard:
@@ -77,10 +78,12 @@ public class TopDocs {
 
     int getShardIndex(ScoreDoc scoreDoc) {
       if (useScoreDocIndex) {
-        assert scoreDoc.shardIndex != -1 : "scoreDoc shardIndex must be predefined set but wasn't";
+        if (scoreDoc.shardIndex == -1) {
+          throw new IllegalArgumentException("setShardIndex is false but TopDocs[" + shardIndex + "].scoreDocs[" + hitIndex + "] is not set");
+        }
         return scoreDoc.shardIndex;
       } else {
-        assert scoreDoc.shardIndex == -1 : "scoreDoc shardIndex must be undefined but wasn't";
+        // NOTE: we don't assert that shardIndex is -1 here, because caller could in fact have set it but asked us to ignore it now
         return shardIndex;
       }
     }
@@ -201,23 +204,25 @@ public class TopDocs {
    *  the provided TopDocs, sorting by score. Each {@link TopDocs}
    *  instance must be sorted.
    *
-   *  @see #merge(int, int, TopDocs[])
+   *  @see #merge(int, int, TopDocs[], boolean)
    *  @lucene.experimental */
   public static TopDocs merge(int topN, TopDocs[] shardHits) {
-    return merge(0, topN, shardHits);
+    return merge(0, topN, shardHits, true);
   }
 
   /**
    * Same as {@link #merge(int, TopDocs[])} but also ignores the top
    * {@code start} top docs. This is typically useful for pagination.
    *
-   * Note: This method will fill the {@link ScoreDoc#shardIndex} on all score docs returned iff all ScoreDocs passed
-   * to this have it's shard index set to <tt>-1</tt>. Otherwise the shard index is not set. This allows to predefine
-   * the shard index in order to incrementally merge shard responses without losing the original shard index.
+   * Note: If {@code setShardIndex} is true, this method will assume the incoming order of {@code shardHits} reflects
+   * each shard's index and will fill the {@link ScoreDoc#shardIndex}, otherwise
+   * it must already be set for all incoming {@code ScoreDoc}s, which can be useful when doing multiple reductions
+   * (merges) of TopDocs.
+   *
    * @lucene.experimental
    */
-  public static TopDocs merge(int start, int topN, TopDocs[] shardHits) {
-    return mergeAux(null, start, topN, shardHits);
+  public static TopDocs merge(int start, int topN, TopDocs[] shardHits, boolean setShardIndex) {
+    return mergeAux(null, start, topN, shardHits, setShardIndex);
   }
 
   /** Returns a new TopFieldDocs, containing topN results across
@@ -226,31 +231,34 @@ public class TopDocs {
    *  the same Sort, and sort field values must have been
    *  filled (ie, <code>fillFields=true</code> must be
    *  passed to {@link TopFieldCollector#create}).
-   *  @see #merge(Sort, int, int, TopFieldDocs[])
+   *  @see #merge(Sort, int, int, TopFieldDocs[], boolean)
    * @lucene.experimental */
   public static TopFieldDocs merge(Sort sort, int topN, TopFieldDocs[] shardHits) {
-    return merge(sort, 0, topN, shardHits);
+    return merge(sort, 0, topN, shardHits, true);
   }
 
   /**
    * Same as {@link #merge(Sort, int, TopFieldDocs[])} but also ignores the top
    * {@code start} top docs. This is typically useful for pagination.
    *
-   * Note: This method will fill the {@link ScoreDoc#shardIndex} on all score docs returned iff all ScoreDocs passed
-   * to this have it's shard index set to <tt>-1</tt>. Otherwise the shard index is not set. This allows to predefine
-   * the shard index in order to incrementally merge shard responses without losing the original shard index.
+   * Note: If {@code setShardIndex} is true, this method will assume the incoming order of {@code shardHits} reflects
+   * each shard's index and will fill the {@link ScoreDoc#shardIndex}, otherwise
+   * it must already be set for all incoming {@code ScoreDoc}s, which can be useful when doing multiple reductions
+   * (merges) of TopDocs.
+   *
    * @lucene.experimental
    */
-  public static TopFieldDocs merge(Sort sort, int start, int topN, TopFieldDocs[] shardHits) {
+  public static TopFieldDocs merge(Sort sort, int start, int topN, TopFieldDocs[] shardHits, boolean setShardIndex) {
     if (sort == null) {
       throw new IllegalArgumentException("sort must be non-null when merging field-docs");
     }
-    return (TopFieldDocs) mergeAux(sort, start, topN, shardHits);
+    return (TopFieldDocs) mergeAux(sort, start, topN, shardHits, setShardIndex);
   }
 
   /** Auxiliary method used by the {@link #merge} impls. A sort value of null
    *  is used to indicate that docs should be sorted by score. */
-  private static TopDocs mergeAux(Sort sort, int start, int size, TopDocs[] shardHits) {
+  private static TopDocs mergeAux(Sort sort, int start, int size, TopDocs[] shardHits, boolean setShardIndex) {
+
     final PriorityQueue<ShardRef> queue;
     if (sort == null) {
       queue = new ScoreMergeSortQueue(shardHits);
@@ -261,28 +269,15 @@ public class TopDocs {
     int totalHitCount = 0;
     int availHitCount = 0;
     float maxScore = Float.MIN_VALUE;
-    Boolean setShardIndex = null;
     for(int shardIDX=0;shardIDX<shardHits.length;shardIDX++) {
       final TopDocs shard = shardHits[shardIDX];
       // totalHits can be non-zero even if no hits were
       // collected, when searchAfter was used:
       totalHitCount += shard.totalHits;
       if (shard.scoreDocs != null && shard.scoreDocs.length > 0) {
-        if (shard.scoreDocs[0].shardIndex == -1) {
-          if (setShardIndex != null && setShardIndex == false) {
-            throw new IllegalStateException("scoreDocs at index " + shardIDX + " has undefined shard indices but previous scoreDocs were predefined");
-          }
-          setShardIndex = true;
-        } else {
-          if (setShardIndex != null && setShardIndex) {
-            throw new IllegalStateException("scoreDocs at index " + shardIDX + " has predefined shard indices but previous scoreDocs were undefined");
-          }
-          setShardIndex = false;
-        }
         availHitCount += shard.scoreDocs.length;
         queue.add(new ShardRef(shardIDX, setShardIndex == false));
         maxScore = Math.max(maxScore, shard.getMaxScore());
-        //System.out.println("  maxScore now " + maxScore + " vs " + shard.getMaxScore());
       }
     }
 
@@ -303,19 +298,16 @@ public class TopDocs {
         ShardRef ref = queue.top();
         final ScoreDoc hit = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++];
         if (setShardIndex) {
-          // unless this index is already initialized potentially due to multiple merge phases, or explicitly by the user
-          // we set the shard index to the index of the TopDocs array this hit is coming from.
-          // this allows multiple merge phases if needed but requires extra accounting on the users end.
-          // at the same time this is fully backwards compatible since the value was initialize to -1 from the beginning
+          // caller asked us to record shardIndex (index of the TopDocs array) this hit is coming from:
           hit.shardIndex = ref.shardIndex;
+        } else if (hit.shardIndex == -1) {
+          throw new IllegalArgumentException("setShardIndex is false but TopDocs[" + ref.shardIndex + "].scoreDocs[" + (ref.hitIndex-1) + "] is not set");
         }
+          
         if (hitUpto >= start) {
           hits[hitUpto - start] = hit;
         }
 
-        //System.out.println("  hitUpto=" + hitUpto);
-        //System.out.println("    doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score);
-
         hitUpto++;
 
         if (ref.hitIndex < shardHits[ref.shardIndex].scoreDocs.length) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e56c0e5/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java
index 37c61a4..0372c2a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java
@@ -17,15 +17,22 @@
 package org.apache.lucene.search;
 
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FloatDocValuesField;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
-import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.CompositeReaderContext;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReaderContext;
+import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.Term;
@@ -35,13 +42,6 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 public class TestTopDocsMerge extends LuceneTestCase {
 
   private static class ShardSearcher extends IndexSearcher {
@@ -77,18 +77,18 @@ public class TestTopDocsMerge extends LuceneTestCase {
 
   public void testInconsistentTopDocsFail() {
     TopDocs[] topDocs = new TopDocs[] {
-        new TopDocs(1, new ScoreDoc[] { new ScoreDoc(1, 1.0f, 1) }),
+        new TopDocs(1, new ScoreDoc[] { new ScoreDoc(1, 1.0f) }),
         new TopDocs(1, new ScoreDoc[] { new ScoreDoc(1, 1.0f, -1) })
     };
     if (random().nextBoolean()) {
       ArrayUtil.swap(topDocs, 0, 1);
     }
-    expectThrows(IllegalStateException.class, () -> {
-      TopDocs.merge(0, 1, topDocs);
+    expectThrows(IllegalArgumentException.class, () -> {
+        TopDocs.merge(0, 1, topDocs, false);
     });
   }
 
-  public void testAssignShardIndex() {
+  public void testPreAssignedShardIndex() {
     boolean useConstantScore = random().nextBoolean();
     int numTopDocs = 2 + random().nextInt(10);
     ArrayList<TopDocs> topDocs = new ArrayList<>(numTopDocs);
@@ -100,8 +100,8 @@ public class TestTopDocsMerge extends LuceneTestCase {
       ScoreDoc[] scoreDocs = new ScoreDoc[numHits];
       for (int j = 0; j < scoreDocs.length; j++) {
         float score = useConstantScore ? 1.0f : random().nextFloat();
-        scoreDocs[j] = new ScoreDoc((100 * i) + j, score , i);
         // we set the shard index to index in the list here but shuffle the entire list below
+        scoreDocs[j] = new ScoreDoc((100 * i) + j, score , i);
       }
       topDocs.add(new TopDocs(numHits, scoreDocs));
       shardResultMapping.put(i, topDocs.get(i));
@@ -111,7 +111,11 @@ public class TestTopDocsMerge extends LuceneTestCase {
     Collections.shuffle(topDocs, random());
     final int from = random().nextInt(numHitsTotal-1);
     final int size = 1 + random().nextInt(numHitsTotal - from);
-    TopDocs merge = TopDocs.merge(from, size, topDocs.toArray(new TopDocs[0]));
+
+    // passing false here means TopDocs.merge uses the incoming ScoreDoc.shardIndex
+    // that we already set, instead of the position of that TopDocs in the array:
+    TopDocs merge = TopDocs.merge(from, size, topDocs.toArray(new TopDocs[0]), false);
+    
     assertTrue(merge.scoreDocs.length > 0);
     for (ScoreDoc scoreDoc : merge.scoreDocs) {
       assertTrue(scoreDoc.shardIndex != -1);
@@ -129,7 +133,7 @@ public class TestTopDocsMerge extends LuceneTestCase {
 
     // now ensure merge is stable even if we use our own shard IDs
     Collections.shuffle(topDocs, random());
-    TopDocs merge2 = TopDocs.merge(from, size, topDocs.toArray(new TopDocs[0]));
+    TopDocs merge2 = TopDocs.merge(from, size, topDocs.toArray(new TopDocs[0]), false);
     assertArrayEquals(merge.scoreDocs, merge2.scoreDocs);
   }
 
@@ -346,9 +350,9 @@ public class TestTopDocsMerge extends LuceneTestCase {
       final TopDocs mergedHits;
       if (useFrom) {
         if (sort == null) {
-          mergedHits = TopDocs.merge(from, size, shardHits);
+          mergedHits = TopDocs.merge(from, size, shardHits, true);
         } else {
-          mergedHits = TopDocs.merge(sort, from, size, (TopFieldDocs[]) shardHits);
+          mergedHits = TopDocs.merge(sort, from, size, (TopFieldDocs[]) shardHits, true);
         }
       } else {
         if (sort == null) {