You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2016/10/26 16:57:15 UTC

lucene-solr:branch_6x: SOLR-4164: fix group.limit=-1 in distributed mode

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 49d6332be -> 65f9b4dc4


SOLR-4164: fix group.limit=-1 in distributed mode


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/65f9b4dc
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/65f9b4dc
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/65f9b4dc

Branch: refs/heads/branch_6x
Commit: 65f9b4dc4c209dfa06aa72386f6a9bbb67a5a667
Parents: 49d6332
Author: yonik <yo...@apache.org>
Authored: Wed Oct 26 11:56:45 2016 -0400
Committer: yonik <yo...@apache.org>
Committed: Wed Oct 26 11:57:15 2016 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  3 +
 .../solr/handler/component/QueryComponent.java  |  7 ++-
 .../java/org/apache/solr/search/Grouping.java   |  4 +-
 .../TopGroupsShardResponseProcessor.java        |  9 ++-
 .../apache/solr/TestDistributedGrouping.java    | 59 ++++++++++----------
 5 files changed, 48 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/65f9b4dc/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 07e6f9d..4973d7d 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -235,6 +235,9 @@ Bug Fixes
 
 * SOLR-2039: Multivalued fields with dynamic names does not work properly with DIH.
   (K A, ruslan.shv, Cao Manh Dat via shalin)
+
+* SOLR-4164: group.limit=-1 was not supported for grouping in distributed mode.
+  (Cao Manh Dat, Lance Norskog, Webster Homer, hossman, yonik)
  
 Optimizations
 ----------------------

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/65f9b4dc/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
index d8c8a74..7418ebe 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
@@ -415,6 +415,9 @@ public class QueryComponent extends SearchComponent
               .setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0)
               .setSearcher(searcher);
 
+          int docsToCollect = Grouping.getMax(groupingSpec.getGroupOffset(), groupingSpec.getGroupLimit(), searcher.maxDoc());
+          docsToCollect = Math.max(docsToCollect, 1);
+
           for (String field : groupingSpec.getFields()) {
             SchemaField schemaField = schema.getField(field);
             String[] topGroupsParam = params.getParams(GroupParams.GROUP_DISTRIBUTED_TOPGROUPS_PREFIX + field);
@@ -437,7 +440,7 @@ public class QueryComponent extends SearchComponent
                     .setGroupSort(groupingSpec.getGroupSort())
                     .setSortWithinGroup(groupingSpec.getSortWithinGroup())
                     .setFirstPhaseGroups(topGroups)
-                    .setMaxDocPerGroup(groupingSpec.getGroupOffset() + groupingSpec.getGroupLimit())
+                    .setMaxDocPerGroup(docsToCollect)
                     .setNeedScores(needScores)
                     .setNeedMaxScore(needScores)
                     .build()
@@ -446,7 +449,7 @@ public class QueryComponent extends SearchComponent
 
           for (String query : groupingSpec.getQueries()) {
             secondPhaseBuilder.addCommandField(new Builder()
-                .setDocsToCollect(groupingSpec.getOffset() + groupingSpec.getLimit())
+                .setDocsToCollect(docsToCollect)
                 .setSort(groupingSpec.getGroupSort())
                 .setQuery(query, rb.req)
                 .setDocSet(searcher)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/65f9b4dc/solr/core/src/java/org/apache/solr/search/Grouping.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/Grouping.java b/solr/core/src/java/org/apache/solr/search/Grouping.java
index 80a6aeb..8d6f3ca 100644
--- a/solr/core/src/java/org/apache/solr/search/Grouping.java
+++ b/solr/core/src/java/org/apache/solr/search/Grouping.java
@@ -459,10 +459,10 @@ public class Grouping {
    *
    * @param offset The offset
    * @param len    The number of documents to return
-   * @param max    The number of document to return if len < 0 or if offset + len < 0
+   * @param max    The number of document to return if len &lt; 0 or if offset + len &gt; 0
    * @return offset + len if len equals zero or higher. Otherwise returns max
    */
-  int getMax(int offset, int len, int max) {
+  public static int getMax(int offset, int len, int max) {
     int v = len < 0 ? max : offset + len;
     if (v < 0 || v > max) v = max;
     return v;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/65f9b4dc/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
index d0a06c5..688a6c3 100644
--- a/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/TopGroupsShardResponseProcessor.java
@@ -162,7 +162,14 @@ public class TopGroupsShardResponseProcessor implements ShardResponseProcessor {
         }
 
         TopGroups<BytesRef>[] topGroupsArr = new TopGroups[topGroups.size()];
-        rb.mergedTopGroups.put(groupField, TopGroups.merge(topGroups.toArray(topGroupsArr), groupSort, sortWithinGroup, groupOffsetDefault, docsPerGroupDefault, TopGroups.ScoreMergeMode.None));
+        int docsPerGroup = docsPerGroupDefault;
+        if (docsPerGroup < 0) {
+          docsPerGroup = 0;
+          for (TopGroups subTopGroups : topGroups) {
+            docsPerGroup += subTopGroups.totalGroupedHitCount;
+          }
+        }
+        rb.mergedTopGroups.put(groupField, TopGroups.merge(topGroups.toArray(topGroupsArr), groupSort, sortWithinGroup, groupOffsetDefault, docsPerGroup, TopGroups.ScoreMergeMode.None));
       }
 
       for (String query : commandTopDocs.keySet()) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/65f9b4dc/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
index af42ff4..ad62fcc 100644
--- a/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
+++ b/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
@@ -58,12 +58,12 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
     handle.put("grouped", UNORDERED);   // distrib grouping doesn't guarantee order of top level group commands
 
     // Test distributed grouping with empty indices
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc");
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "hl","true","hl.fl",t1);
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1);
-    query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
-    query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "hl","true","hl.fl",t1);
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", i1);
+    query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
+    query("q", "*:*", "fq", s1 + ":a", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
 
     indexr(id,1, i1, 100, tlong, 100, i1dv, 100, t1,"now is the time for all good men",
            tdate_a, "2010-04-20T11:00:00Z",
@@ -154,23 +154,24 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
     // test grouping
     // The second sort = id asc . The sorting behaviour is different in dist mode. See TopDocs#merge
     // The shard the result came from matters in the order if both document sortvalues are equal
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc");
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "id asc, _docid_ asc");
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "{!func}add(" + i1 + ",5) asc, id asc");
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", tlong);
-    query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "hl","true","hl.fl",t1);
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.sort", "id desc");
-
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc");
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 0, "sort", i1 + " asc, id asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", "id asc, _docid_ asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", "{!func}add(" + i1 + ",5) asc, id asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "stats", "true", "stats.field", tlong);
+    query("q", "kings", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "spellcheck", "true", "spellcheck.build", "true", "qt", "spellCheckCompRH");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "hl","true","hl.fl",t1);
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.sort", "id desc");
+
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", -1, "sort", i1 + " asc, id asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "group.offset", 5, "group.limit", -1, "sort", i1 + " asc, id asc");
     query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "sort", i1 + " asc, id asc", "group.format", "simple");
     query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "offset", 5, "rows", 5, "sort", i1 + " asc, id asc", "group.main", "true");
     query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc", "group.format", "simple", "offset", 5, "rows", 5);
     query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.offset", 5, "group.limit", 5, "sort", i1 + " asc, id asc", "group.main", "true", "offset", 5, "rows", 5);
 
-    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", i1 + " asc, id asc");
+    query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", -1, "sort", i1 + " asc, id asc");
     query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", "group.field", i1, "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", i1 + " asc, id asc");
 
     query("q", "*:*", "fl", "id," + i1dv, "group", "true", "group.field", i1dv, "group.limit", 10, "sort", i1 + " asc, id asc");
@@ -180,7 +181,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
     query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", 
           "group.query", t1 + ":kings OR " + t1 + ":eggs", 
           "group.query", "id:5", // single doc, so only one shard will have it
-          "group.limit", 10, "sort", i1 + " asc, id asc");
+          "group.limit", -1, "sort", i1 + " asc, id asc");
     handle.put(t1 + ":this_will_never_match", SKIP); // :TODO: SOLR-4181
     query("q", "*:*", "rows", 100, "fl", "id," + i1, "group", "true", 
           "group.query", t1 + ":kings OR " + t1 + ":eggs", 
@@ -220,8 +221,8 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
     }
 
     // SOLR-3316
-    query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
-    query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
+    query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "facet", "true", "facet.field", t1);
+    query("q", "*:*", "fq", s1 + ":a", "rows", 0, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " asc, id asc", "group.truncate", "true", "facet", "true", "facet.field", t1);
 
     // SOLR-3436
     query("q", "*:*", "fq", s1 + ":a", "fl", "id," + i1, "group", "true", "group.field", i1, "sort", i1 + " asc, id asc", "group.ngroups", "true");
@@ -241,7 +242,7 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
     }
 
     ModifiableSolrParams params = new ModifiableSolrParams();
-    Object[] q =  {"q", "*:*", "fq", s1 + ":a", "rows", 1, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "group.ngroups", "true"};
+    Object[] q =  {"q", "*:*", "fq", s1 + ":a", "rows", 1, "fl", "id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "group.ngroups", "true"};
 
     for (int i = 0; i < q.length; i += 2) {
       params.add(q[i].toString(), q[i + 1].toString());
@@ -263,25 +264,25 @@ public class TestDistributedGrouping extends BaseDistributedSearchTestCase {
     // We validate distributed grouping with scoring as first sort.
     // note: this 'q' matches all docs and returns the 'id' as the score, which is unique and so our results should be deterministic.
     handle.put("maxScore", SKIP);// TODO see SOLR-6612
-    query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", i1 + " desc", "group.sort", "score desc"); // SOLR-2955
-    query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", 10, "sort", "score desc, _docid_ asc, id asc");
-    query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", 10);
+    query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", i1 + " desc", "group.sort", "score desc"); // SOLR-2955
+    query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", -1, "sort", "score desc, _docid_ asc, id asc");
+    query("q", "{!func}id", "rows", 100, "fl", "score,id," + i1, "group", "true", "group.field", i1, "group.limit", -1);
 
     // some explicit checks of non default sorting, and sort/group.sort with diff clauses
     query("q", "{!func}id", "rows", 100, "fl", tlong + ",id," + i1, "group", "true",
-          "group.field", i1, "group.limit", 10,
+          "group.field", i1, "group.limit", -1,
           "sort", tlong+" asc, id desc");
     query("q", "{!func}id", "rows", 100, "fl", tlong + ",id," + i1, "group", "true",
-          "group.field", i1, "group.limit", 10,
+          "group.field", i1, "group.limit", -1,
           "sort", "id asc",
           "group.sort", tlong+" asc, id desc");
     query("q", "{!func}id", "rows", 100, "fl", tlong + ",id," + i1, "group", "true",
-          "group.field", i1, "group.limit", 10,
+          "group.field", i1, "group.limit", -1,
           "sort", tlong+" asc, id desc",
           "group.sort", "id asc");
     rsp = query("q", "{!func}id", "fq", oddField+":[* TO *]",
                 "rows", 100, "fl", tlong + ",id," + i1, "group", "true",
-                "group.field", i1, "group.limit", 10,
+                "group.field", i1, "group.limit", -1,
                 "sort", tlong+" asc",
                 "group.sort", oddField+" asc");
     nl = (NamedList<?>) rsp.getResponse().get("grouped");