You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2017/03/01 09:27:29 UTC
[21/50] [abbrv] lucene-solr:jira/solr-9858: LUCENE-7707: add explicit
boolean to TopDocs.merge to govern whether incoming or implicit shard index
should be used
LUCENE-7707: add explicit boolean to TopDocs.merge to govern whether incoming or implicit shard index should be used
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2e56c0e5
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2e56c0e5
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2e56c0e5
Branch: refs/heads/jira/solr-9858
Commit: 2e56c0e50564c8feeeb2831dd36cff1e9b23a00f
Parents: 471d842
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Feb 24 17:00:45 2017 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Feb 24 17:01:19 2017 -0500
----------------------------------------------------------------------
lucene/CHANGES.txt | 10 +--
.../java/org/apache/lucene/search/FieldDoc.java | 6 +-
.../org/apache/lucene/search/IndexSearcher.java | 4 +-
.../java/org/apache/lucene/search/TopDocs.java | 68 +++++++++-----------
.../apache/lucene/search/TestTopDocsMerge.java | 38 ++++++-----
5 files changed, 62 insertions(+), 64 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e56c0e5/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 741418a..5d3a077 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -97,6 +97,12 @@ API Changes
* LUCENE-7702: Removed GraphQuery in favour of simple boolean query. (Matt Webber via Jim Ferenczi)
+* LUCENE-7707: TopDocs.merge now takes a boolean option telling it
+ when to use the incoming shard index versus when to assign the shard
+ index itself, allowing users to merge shard responses incrementally
+ instead of once all shard responses are present. (Simon Willnauer,
+ Mike McCandless)
+
New Features
* LUCENE-7449: Add CROSSES relation support to RangeFieldQuery. (Nick Knize)
@@ -172,10 +178,6 @@ Improvements
earlier than regular queries in order to improve cache efficiency.
(Adrien Grand)
-* LUCENE-7707: Use predefined shard index when mergeing top docs if present. This
- allows to use TopDoc#merge to merge shard responses incrementally instead of
- once all shard responses are present. (Simon Willnauer)
-
Optimizations
* LUCENE-7641: Optimized point range queries to compute documents that do not
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e56c0e5/lucene/core/src/java/org/apache/lucene/search/FieldDoc.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/FieldDoc.java b/lucene/core/src/java/org/apache/lucene/search/FieldDoc.java
index 31f7175..6125404 100644
--- a/lucene/core/src/java/org/apache/lucene/search/FieldDoc.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldDoc.java
@@ -52,18 +52,18 @@ public class FieldDoc extends ScoreDoc {
/** Expert: Creates one of these objects with empty sort information. */
public FieldDoc(int doc, float score) {
- super (doc, score);
+ super(doc, score);
}
/** Expert: Creates one of these objects with the given sort information. */
public FieldDoc(int doc, float score, Object[] fields) {
- super (doc, score);
+ super(doc, score);
this.fields = fields;
}
/** Expert: Creates one of these objects with the given sort information. */
public FieldDoc(int doc, float score, Object[] fields, int shardIndex) {
- super (doc, score, shardIndex);
+ super(doc, score, shardIndex);
this.fields = fields;
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e56c0e5/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
index 5cae122..5ab16c2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -432,7 +432,7 @@ public class IndexSearcher {
for (TopScoreDocCollector collector : collectors) {
topDocs[i++] = collector.topDocs();
}
- return TopDocs.merge(cappedNumHits, topDocs);
+ return TopDocs.merge(0, cappedNumHits, topDocs, true);
}
};
@@ -559,7 +559,7 @@ public class IndexSearcher {
for (TopFieldCollector collector : collectors) {
topDocs[i++] = collector.topDocs();
}
- return TopDocs.merge(sort, cappedNumHits, topDocs);
+ return TopDocs.merge(sort, 0, cappedNumHits, topDocs, true);
}
};
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e56c0e5/lucene/core/src/java/org/apache/lucene/search/TopDocs.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/TopDocs.java b/lucene/core/src/java/org/apache/lucene/search/TopDocs.java
index 2913cb2..3b66fca 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TopDocs.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopDocs.java
@@ -16,7 +16,6 @@
*/
package org.apache.lucene.search;
-
import org.apache.lucene.util.PriorityQueue;
/** Represents hits returned by {@link
@@ -60,6 +59,8 @@ public class TopDocs {
private final static class ShardRef {
// Which shard (index into shardHits[]):
final int shardIndex;
+
+ // True if we should use the incoming ScoreDoc.shardIndex for sort order
final boolean useScoreDocIndex;
// Which hit within the shard:
@@ -77,10 +78,12 @@ public class TopDocs {
int getShardIndex(ScoreDoc scoreDoc) {
if (useScoreDocIndex) {
- assert scoreDoc.shardIndex != -1 : "scoreDoc shardIndex must be predefined set but wasn't";
+ if (scoreDoc.shardIndex == -1) {
+ throw new IllegalArgumentException("setShardIndex is false but TopDocs[" + shardIndex + "].scoreDocs[" + hitIndex + "] is not set");
+ }
return scoreDoc.shardIndex;
} else {
- assert scoreDoc.shardIndex == -1 : "scoreDoc shardIndex must be undefined but wasn't";
+ // NOTE: we don't assert that shardIndex is -1 here, because caller could in fact have set it but asked us to ignore it now
return shardIndex;
}
}
@@ -201,23 +204,25 @@ public class TopDocs {
* the provided TopDocs, sorting by score. Each {@link TopDocs}
* instance must be sorted.
*
- * @see #merge(int, int, TopDocs[])
+ * @see #merge(int, int, TopDocs[], boolean)
* @lucene.experimental */
public static TopDocs merge(int topN, TopDocs[] shardHits) {
- return merge(0, topN, shardHits);
+ return merge(0, topN, shardHits, true);
}
/**
* Same as {@link #merge(int, TopDocs[])} but also ignores the top
* {@code start} top docs. This is typically useful for pagination.
*
- * Note: This method will fill the {@link ScoreDoc#shardIndex} on all score docs returned iff all ScoreDocs passed
- * to this have it's shard index set to <tt>-1</tt>. Otherwise the shard index is not set. This allows to predefine
- * the shard index in order to incrementally merge shard responses without losing the original shard index.
+ * Note: If {@code setShardIndex} is true, this method will assume the incoming order of {@code shardHits} reflects
+ * each shard's index and will fill the {@link ScoreDoc#shardIndex}, otherwise
+ * it must already be set for all incoming {@code ScoreDoc}s, which can be useful when doing multiple reductions
+ * (merges) of TopDocs.
+ *
* @lucene.experimental
*/
- public static TopDocs merge(int start, int topN, TopDocs[] shardHits) {
- return mergeAux(null, start, topN, shardHits);
+ public static TopDocs merge(int start, int topN, TopDocs[] shardHits, boolean setShardIndex) {
+ return mergeAux(null, start, topN, shardHits, setShardIndex);
}
/** Returns a new TopFieldDocs, containing topN results across
@@ -226,31 +231,34 @@ public class TopDocs {
* the same Sort, and sort field values must have been
* filled (ie, <code>fillFields=true</code> must be
* passed to {@link TopFieldCollector#create}).
- * @see #merge(Sort, int, int, TopFieldDocs[])
+ * @see #merge(Sort, int, int, TopFieldDocs[], boolean)
* @lucene.experimental */
public static TopFieldDocs merge(Sort sort, int topN, TopFieldDocs[] shardHits) {
- return merge(sort, 0, topN, shardHits);
+ return merge(sort, 0, topN, shardHits, true);
}
/**
* Same as {@link #merge(Sort, int, TopFieldDocs[])} but also ignores the top
* {@code start} top docs. This is typically useful for pagination.
*
- * Note: This method will fill the {@link ScoreDoc#shardIndex} on all score docs returned iff all ScoreDocs passed
- * to this have it's shard index set to <tt>-1</tt>. Otherwise the shard index is not set. This allows to predefine
- * the shard index in order to incrementally merge shard responses without losing the original shard index.
+ * Note: If {@code setShardIndex} is true, this method will assume the incoming order of {@code shardHits} reflects
+ * each shard's index and will fill the {@link ScoreDoc#shardIndex}, otherwise
+ * it must already be set for all incoming {@code ScoreDoc}s, which can be useful when doing multiple reductions
+ * (merges) of TopDocs.
+ *
* @lucene.experimental
*/
- public static TopFieldDocs merge(Sort sort, int start, int topN, TopFieldDocs[] shardHits) {
+ public static TopFieldDocs merge(Sort sort, int start, int topN, TopFieldDocs[] shardHits, boolean setShardIndex) {
if (sort == null) {
throw new IllegalArgumentException("sort must be non-null when merging field-docs");
}
- return (TopFieldDocs) mergeAux(sort, start, topN, shardHits);
+ return (TopFieldDocs) mergeAux(sort, start, topN, shardHits, setShardIndex);
}
/** Auxiliary method used by the {@link #merge} impls. A sort value of null
* is used to indicate that docs should be sorted by score. */
- private static TopDocs mergeAux(Sort sort, int start, int size, TopDocs[] shardHits) {
+ private static TopDocs mergeAux(Sort sort, int start, int size, TopDocs[] shardHits, boolean setShardIndex) {
+
final PriorityQueue<ShardRef> queue;
if (sort == null) {
queue = new ScoreMergeSortQueue(shardHits);
@@ -261,28 +269,15 @@ public class TopDocs {
int totalHitCount = 0;
int availHitCount = 0;
float maxScore = Float.MIN_VALUE;
- Boolean setShardIndex = null;
for(int shardIDX=0;shardIDX<shardHits.length;shardIDX++) {
final TopDocs shard = shardHits[shardIDX];
// totalHits can be non-zero even if no hits were
// collected, when searchAfter was used:
totalHitCount += shard.totalHits;
if (shard.scoreDocs != null && shard.scoreDocs.length > 0) {
- if (shard.scoreDocs[0].shardIndex == -1) {
- if (setShardIndex != null && setShardIndex == false) {
- throw new IllegalStateException("scoreDocs at index " + shardIDX + " has undefined shard indices but previous scoreDocs were predefined");
- }
- setShardIndex = true;
- } else {
- if (setShardIndex != null && setShardIndex) {
- throw new IllegalStateException("scoreDocs at index " + shardIDX + " has predefined shard indices but previous scoreDocs were undefined");
- }
- setShardIndex = false;
- }
availHitCount += shard.scoreDocs.length;
queue.add(new ShardRef(shardIDX, setShardIndex == false));
maxScore = Math.max(maxScore, shard.getMaxScore());
- //System.out.println(" maxScore now " + maxScore + " vs " + shard.getMaxScore());
}
}
@@ -303,19 +298,16 @@ public class TopDocs {
ShardRef ref = queue.top();
final ScoreDoc hit = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++];
if (setShardIndex) {
- // unless this index is already initialized potentially due to multiple merge phases, or explicitly by the user
- // we set the shard index to the index of the TopDocs array this hit is coming from.
- // this allows multiple merge phases if needed but requires extra accounting on the users end.
- // at the same time this is fully backwards compatible since the value was initialize to -1 from the beginning
+ // caller asked us to record shardIndex (index of the TopDocs array) this hit is coming from:
hit.shardIndex = ref.shardIndex;
+ } else if (hit.shardIndex == -1) {
+ throw new IllegalArgumentException("setShardIndex is false but TopDocs[" + ref.shardIndex + "].scoreDocs[" + (ref.hitIndex-1) + "] is not set");
}
+
if (hitUpto >= start) {
hits[hitUpto - start] = hit;
}
- //System.out.println(" hitUpto=" + hitUpto);
- //System.out.println(" doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score);
-
hitUpto++;
if (ref.hitIndex < shardHits[ref.shardIndex].scoreDocs.length) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e56c0e5/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java
index 37c61a4..0372c2a 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestTopDocsMerge.java
@@ -17,15 +17,22 @@
package org.apache.lucene.search;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
-import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.CompositeReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.Term;
@@ -35,13 +42,6 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
public class TestTopDocsMerge extends LuceneTestCase {
private static class ShardSearcher extends IndexSearcher {
@@ -77,18 +77,18 @@ public class TestTopDocsMerge extends LuceneTestCase {
public void testInconsistentTopDocsFail() {
TopDocs[] topDocs = new TopDocs[] {
- new TopDocs(1, new ScoreDoc[] { new ScoreDoc(1, 1.0f, 1) }),
+ new TopDocs(1, new ScoreDoc[] { new ScoreDoc(1, 1.0f) }),
new TopDocs(1, new ScoreDoc[] { new ScoreDoc(1, 1.0f, -1) })
};
if (random().nextBoolean()) {
ArrayUtil.swap(topDocs, 0, 1);
}
- expectThrows(IllegalStateException.class, () -> {
- TopDocs.merge(0, 1, topDocs);
+ expectThrows(IllegalArgumentException.class, () -> {
+ TopDocs.merge(0, 1, topDocs, false);
});
}
- public void testAssignShardIndex() {
+ public void testPreAssignedShardIndex() {
boolean useConstantScore = random().nextBoolean();
int numTopDocs = 2 + random().nextInt(10);
ArrayList<TopDocs> topDocs = new ArrayList<>(numTopDocs);
@@ -100,8 +100,8 @@ public class TestTopDocsMerge extends LuceneTestCase {
ScoreDoc[] scoreDocs = new ScoreDoc[numHits];
for (int j = 0; j < scoreDocs.length; j++) {
float score = useConstantScore ? 1.0f : random().nextFloat();
- scoreDocs[j] = new ScoreDoc((100 * i) + j, score , i);
// we set the shard index to index in the list here but shuffle the entire list below
+ scoreDocs[j] = new ScoreDoc((100 * i) + j, score , i);
}
topDocs.add(new TopDocs(numHits, scoreDocs));
shardResultMapping.put(i, topDocs.get(i));
@@ -111,7 +111,11 @@ public class TestTopDocsMerge extends LuceneTestCase {
Collections.shuffle(topDocs, random());
final int from = random().nextInt(numHitsTotal-1);
final int size = 1 + random().nextInt(numHitsTotal - from);
- TopDocs merge = TopDocs.merge(from, size, topDocs.toArray(new TopDocs[0]));
+
+ // passing false here means TopDocs.merge uses the incoming ScoreDoc.shardIndex
+ // that we already set, instead of the position of that TopDocs in the array:
+ TopDocs merge = TopDocs.merge(from, size, topDocs.toArray(new TopDocs[0]), false);
+
assertTrue(merge.scoreDocs.length > 0);
for (ScoreDoc scoreDoc : merge.scoreDocs) {
assertTrue(scoreDoc.shardIndex != -1);
@@ -129,7 +133,7 @@ public class TestTopDocsMerge extends LuceneTestCase {
// now ensure merge is stable even if we use our own shard IDs
Collections.shuffle(topDocs, random());
- TopDocs merge2 = TopDocs.merge(from, size, topDocs.toArray(new TopDocs[0]));
+ TopDocs merge2 = TopDocs.merge(from, size, topDocs.toArray(new TopDocs[0]), false);
assertArrayEquals(merge.scoreDocs, merge2.scoreDocs);
}
@@ -346,9 +350,9 @@ public class TestTopDocsMerge extends LuceneTestCase {
final TopDocs mergedHits;
if (useFrom) {
if (sort == null) {
- mergedHits = TopDocs.merge(from, size, shardHits);
+ mergedHits = TopDocs.merge(from, size, shardHits, true);
} else {
- mergedHits = TopDocs.merge(sort, from, size, (TopFieldDocs[]) shardHits);
+ mergedHits = TopDocs.merge(sort, from, size, (TopFieldDocs[]) shardHits, true);
}
} else {
if (sort == null) {