You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2016/10/26 16:57:15 UTC
lucene-solr:branch_6x: SOLR-4164: fix group.limit=-1 in distributed
mode
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x 49d6332be -> 65f9b4dc4
SOLR-4164: fix group.limit=-1 in distributed mode
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/65f9b4dc
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/65f9b4dc
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/65f9b4dc
Branch: refs/heads/branch_6x
Commit: 65f9b4dc4c209dfa06aa72386f6a9bbb67a5a667
Parents: 49d6332
Author: yonik <yo...@apache.org>
Authored: Wed Oct 26 11:56:45 2016 -0400
Committer: yonik <yo...@apache.org>
Committed: Wed Oct 26 11:57:15 2016 -0400
----------------------------------------------------------------------
solr/CHANGES.txt | 3 +
.../solr/handler/component/QueryComponent.java | 7 ++-
.../java/org/apache/solr/search/Grouping.java | 4 +-
.../TopGroupsShardResponseProcessor.java | 9 ++-
.../apache/solr/TestDistributedGrouping.java | 59 ++++++++++----------
5 files changed, 48 insertions(+), 34 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/65f9b4dc/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 07e6f9d..4973d7d 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -235,6 +235,9 @@ Bug Fixes
* SOLR-2039: Multivalued fields with dynamic names does not work properly with DIH.
(K A, ruslan.shv, Cao Manh Dat via shalin)
+
+* SOLR-4164: group.limit=-1 was not supported for grouping in distributed mode.
+ (Cao Manh Dat, Lance Norskog, Webster Homer, hossman, yonik)
Optimizations
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/65f9b4dc/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index d8c8a74..7418ebe 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -415,6 +415,9 @@ public class QueryComponent extends SearchComponent
.setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0)
.setSearcher(searcher);
+ int docsToCollect = Grouping.getMax(groupingSpec.getGroupOffset(), groupingSpec.getGroupLimit(), searcher.maxDoc());
+ docsToCollect = Math.max(docsToCollect, 1);
+
for (String field : groupingSpec.getFields()) {
SchemaField schemaField = schema.getField(field);
String[] topGroupsParam = params.getParams(GroupParams.GROUP_DISTRIBUTED_TOPGROUPS_PREFIX + field);
@@ -437,7 +440,7 @@ public class QueryComponent extends SearchComponent
.setGroupSort(groupingSpec.getGroupSort())
.setSortWithinGroup(groupingSpec.getSortWithinGroup())
.setFirstPhaseGroups(topGroups)
- .setMaxDocPerGroup(groupingSpec.getGroupOffset() + groupingSpec.getGroupLimit())
+ .setMaxDocPerGroup(docsToCollect)
.setNeedScores(needScores)
.setNeedMaxScore(needScores)
.build()
@@ -446,7 +449,7 @@ public class QueryComponent extends SearchComponent
for (String query : groupingSpec.getQueries()) {
secondPhaseBuilder.addCommandField(new Builder()
- .setDocsToCollect(groupingSpec.getOffset() + groupingSpec.getLimit())
+ .setDocsToCollect(docsToCollect)
.setSort(groupingSpec.getGroupSort())
.setQuery(query, rb.req)
.setDocSet(searcher)
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/65f9b4dc/solr/core/src/java/org/apache/solr/search/Grouping.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/Grouping.java b/solr/core/src/java/org/apache/solr/search/Grouping.java
index 80a6aeb..8d6f3ca 100644
--- a/solr/core/src/java/org/apache/solr/search/Grouping.java
+++ b/solr/core/src/java/org/apache/solr/search/Grouping.java
@@ -459,10 +459,10 @@ public class Grouping {
*
* @param offset The offset
* @param len The number of documents to return
- * @param max The number of document to return if len < 0 or if offset + len < 0
+ * @param max The number of document to return if len < 0 or if offset + len > 0
* @return offset + len if len equals zero or higher. Otherwise returns max
*/
- int getMax(int offset, int len, int max) {
+ public static int getMax(int offset, int len, int max) {
int v = len < 0 ? max : offset + len;
if (v < 0 || v > max) v = max;
return v;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/65f9b4dc/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
index d0a06c5..688a6c3 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
@@ -162,7 +162,14 @@ public class TopGroupsShardResponseProcessor implements ShardResponseProcessor {
}
TopGroups<BytesRef>[] topGroupsArr = new TopGroups[topGroups.size()];
- rb.mergedTopGroups.put(groupField, TopGroups.merge(topGroups.toArray(topGroupsArr), groupSort, sortWithinGroup, groupOffsetDefault, docsPerGroupDefault, TopGroups.ScoreMergeMode.None));
+ int docsPerGroup = docsPerGroupDefault;
+ if (docsPerGroup < 0) {
+ docsPerGroup = 0;
+ for (TopGroups subTopGroups : topGroups) {
+ docsPerGroup += subTopGroups.totalGroupedHitCount;
+ }
+ }
+ rb.mergedTopGroups.put(groupField, TopGroups.merge(topGroups.toArray(topGroupsArr), groupSort, sortWithinGroup, groupOffsetDefault, docsPerGroup, TopGroups.ScoreMergeMode.None));
}
for (String query : commandTopDocs.keySet()) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/65f9b4dc/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
index af42ff4..ad62fcc 100644
--- a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
+++ b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
@@ -58,12 +58,12 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
handle.put("grouped", UNORDERED); // distrib grouping doesn't guarantee order of top level group commands
// Test distributed grouping with empty indices
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc");
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "hl","true","hl.fl",t1);
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1);
- query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
- query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc");
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "hl","true","hl.fl",t1);
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1);
+ query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
+ query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
indexr(id,1, i1, 100, tlong, 100, i1dv, 100, t1,"now is the time for all good men",
tdate_a, "2010-04-20T11:00:00Z",
@@ -154,23 +154,24 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
// test grouping
// The second sort = id asc . The sorting behaviour is different in dist mode. See TopDocs#merge
// The shard the result came from matters in the order if both document sortvalues are equal
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc");
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "id asc, _docid_ asc");
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "{!func}add(" + i1 + ",5) asc, id asc");
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", tlong);
- query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "hl","true","hl.fl",t1);
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.sort", "id desc");
-
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc");
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc");
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc");
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 0, "sort", i1 + " asc, id asc");
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", "id asc, _docid_ asc");
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", "{!func}add(" + i1 + ",5) asc, id asc");
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", tlong);
+ query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "hl","true","hl.fl",t1);
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.sort", "id desc");
+
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", -1, "sort", i1 + " asc, id asc");
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "group.offset", 5, "group.limit", -1, "sort", i1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "sort", i1 + " asc, id asc", "group.format", "simple");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "sort", i1 + " asc, id asc", "group.main", "true");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc", "group.format", "simple", "offset", 5, "rows", 5);
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc", "group.main", "true", "offset", 5, "rows", 5);
- query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", i1 + " asc, id asc");
+ query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", -1, "sort", i1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", i1 + " asc, id asc");
query("q", "*:*", "fl", "id," + i1dv, "group", "true", "group.field", i1dv, "group.limit", 10, "sort", i1 + " asc, id asc");
@@ -180,7 +181,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true",
"group.query", t1 + ":kings OR " + t1 + ":eggs",
"group.query", "id:5", // single doc, so only one shard will have it
- "group.limit", 10, "sort", i1 + " asc, id asc");
+ "group.limit", -1, "sort", i1 + " asc, id asc");
handle.put(t1 + ":this_will_never_match", SKIP); // :TODO: SOLR-4181
query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true",
"group.query", t1 + ":kings OR " + t1 + ":eggs",
@@ -220,8 +221,8 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
}
// SOLR-3316
- query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
- query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
+ query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
+ query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
// SOLR-3436
query("q", "*:*", "fq", s1 + ":a", "fl", "id," + i1, "group", "true", "group.field", i1, "sort", i1 + " asc, id asc", "group.ngroups", "true");
@@ -241,7 +242,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
}
ModifiableSolrParams params = new ModifiableSolrParams();
- Object[] q = {"q", "*:*", "fq", s1 + ":a", "rows", 1, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "group.ngroups", "true"};
+ Object[] q = {"q", "*:*", "fq", s1 + ":a", "rows", 1, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "group.ngroups", "true"};
for (int i = 0; i < q.length; i += 2) {
params.add(q[i].toString(), q[i + 1].toString());
@@ -263,25 +264,25 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
// We validate distributed grouping with scoring as first sort.
// note: this 'q' matches all docs and returns the 'id' as the score, which is unique and so our results should be deterministic.
handle.put("maxScore", SKIP);// TODO see SOLR-6612
- query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " desc", "group.sort", "score desc"); // SOLR-2955
- query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "score desc, _docid_ asc, id asc");
- query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", 10);
+ query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " desc", "group.sort", "score desc"); // SOLR-2955
+ query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", "score desc, _docid_ asc, id asc");
+ query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", -1);
// some explicit checks of non default sorting, and sort/group.sort with diff clauses
query("q", "{!func}id", "rows", 100, "fl", tlong + ",id," + i1, "group", "true",
- "group.field", i1, "group.limit", 10,
+ "group.field", i1, "group.limit", -1,
"sort", tlong+" asc, id desc");
query("q", "{!func}id", "rows", 100, "fl", tlong + ",id," + i1, "group", "true",
- "group.field", i1, "group.limit", 10,
+ "group.field", i1, "group.limit", -1,
"sort", "id asc",
"group.sort", tlong+" asc, id desc");
query("q", "{!func}id", "rows", 100, "fl", tlong + ",id," + i1, "group", "true",
- "group.field", i1, "group.limit", 10,
+ "group.field", i1, "group.limit", -1,
"sort", tlong+" asc, id desc",
"group.sort", "id asc");
rsp = query("q", "{!func}id", "fq", oddField+":[* TO *]",
"rows", 100, "fl", tlong + ",id," + i1, "group", "true",
- "group.field", i1, "group.limit", 10,
+ "group.field", i1, "group.limit", -1,
"sort", tlong+" asc",
"group.sort", oddField+" asc");
nl = (NamedList<?>) rsp.getResponse().get("grouped");