You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mv...@apache.org on 2012/02/13 01:00:45 UTC
svn commit: r1243374 - in /lucene/dev/branches/branch_3x/solr: ./
core/src/java/org/apache/solr/handler/component/
core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/
core/src/java/org/apache/solr/search/grouping/distributed/respo...
Author: mvg
Date: Mon Feb 13 00:00:45 2012
New Revision: 1243374
URL: http://svn.apache.org/viewvc?rev=1243374&view=rev
Log:
SOLR-3109: Fixed numerous redundant shard requests when using distributed grouping.
Modified:
lucene/dev/branches/branch_3x/solr/CHANGES.txt
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/SearchGroupsRequestFactory.java
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/TopGroupsShardRequestFactory.java
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java
lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=1243374&r1=1243373&r2=1243374&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Mon Feb 13 00:00:45 2012
@@ -1,4 +1,3 @@
-
Apache Solr Release Notes
Introduction
@@ -191,6 +190,8 @@ Bug Fixes
* SOLR-3084: Fixed initialiazation error when using
<queryResponseWriter default="true" ... /> (Bernd Fehling and hossman)
+* SOLR-3109: Fixed numerous redundant shard requests when using distributed grouping.
+ (rblack via Martijn van Groningen)
Other Changes
----------------------
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java?rev=1243374&r1=1243373&r2=1243374&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/QueryComponent.java Mon Feb 13 00:00:45 2012
@@ -251,7 +251,7 @@ public class QueryComponent extends Sear
if (groupingSpec != null) {
try {
boolean needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
- if (params.getBool("group.distibuted.first", false)) {
+ if (params.getBool("group.distributed.first", false)) {
CommandHandler.Builder topsGroupsActionBuilder = new CommandHandler.Builder()
.setQueryCommand(cmd)
.setNeedDocSet(false) // Order matters here
@@ -272,7 +272,7 @@ public class QueryComponent extends Sear
rsp.add("firstPhase", commandHandler.processResult(result, serializer));
rb.setResult(result);
return;
- } else if (params.getBool("group.distibuted.second", false)) {
+ } else if (params.getBool("group.distributed.second", false)) {
CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder()
.setQueryCommand(cmd)
.setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0)
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java?rev=1243374&r1=1243373&r2=1243374&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/handler/component/ResponseBuilder.java Mon Feb 13 00:00:45 2012
@@ -40,6 +40,7 @@ import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Set;
/**
* This class is experimental and will be changing in the future.
@@ -158,7 +159,7 @@ public class ResponseBuilder
// Context fields for grouping
public final Map<String, Collection<SearchGroup<String>>> mergedSearchGroups = new HashMap<String, Collection<SearchGroup<String>>>();
- public final Map<String, Map<SearchGroup<String>, String>> searchGroupToShard = new HashMap<String, Map<SearchGroup<String>, String>>();
+ public final Map<String, Map<SearchGroup<String>, Set<String>>> searchGroupToShards = new HashMap<String, Map<SearchGroup<String>, Set<String>>>();
public final Map<String, TopGroups<String>> mergedTopGroups = new HashMap<String, TopGroups<String>>();
public final Map<String, QueryCommandResult> mergedQueryCommandResults = new HashMap<String, QueryCommandResult>();
public final Map<Object, SolrDocument> retrievedDocuments = new HashMap<Object, SolrDocument>();
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/SearchGroupsRequestFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/SearchGroupsRequestFactory.java?rev=1243374&r1=1243373&r2=1243374&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/SearchGroupsRequestFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/SearchGroupsRequestFactory.java Mon Feb 13 00:00:45 2012
@@ -71,7 +71,7 @@ public class SearchGroupsRequestFactory
// in this first phase, request only the unique key field
// and any fields needed for merging.
- sreq.params.set("group.distibuted.first","true");
+ sreq.params.set("group.distributed.first","true");
if ( (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES)!=0 || rb.getSortSpec().includesScore()) {
sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName() + ",score");
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/TopGroupsShardRequestFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/TopGroupsShardRequestFactory.java?rev=1243374&r1=1243373&r2=1243374&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/TopGroupsShardRequestFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/requestfactory/TopGroupsShardRequestFactory.java Mon Feb 13 00:00:45 2012
@@ -64,102 +64,58 @@ public class TopGroupsShardRequestFactor
private ShardRequest[] createRequestForSpecificShards(ResponseBuilder rb) {
// Determine all unique shards to query for TopGroups
- Set<String> shards = new HashSet<String>();
- for (String command : rb.searchGroupToShard.keySet()) {
- Map<SearchGroup<String>, String> groupsToShard = rb.searchGroupToShard.get(command);
- shards.addAll(groupsToShard.values());
- }
-
- ShardRequest[] sreqs = new ShardRequest[shards.size()];
- int i = 0;
- for (String shard : shards) {
- ShardRequest sreq = new ShardRequest();
- sreq.purpose = ShardRequest.PURPOSE_GET_TOP_IDS;
- sreq.actualShards = new String[] {shard};
- sreq.params = new ModifiableSolrParams(rb.req.getParams());
-
- // If group.format=simple group.offset doesn't make sense
- Grouping.Format responseFormat = rb.getGroupingSpec().getResponseFormat();
- if (responseFormat == Grouping.Format.simple || rb.getGroupingSpec().isMain()) {
- sreq.params.remove(GroupParams.GROUP_OFFSET);
+ Set<String> uniqueShards = new HashSet<String>();
+ for (String command : rb.searchGroupToShards.keySet()) {
+ Map<SearchGroup<String>, Set<String>> shards = rb.searchGroupToShards.get(command);
+ for(Set<String> shardsForGroup: shards.values()) {
+ uniqueShards.addAll(shardsForGroup);
}
-
- sreq.params.remove(ShardParams.SHARDS);
-
- // set the start (offset) to 0 for each shard request so we can properly merge
- // results from the start.
- if(rb.shards_start > -1) {
- // if the client set shards.start set this explicitly
- sreq.params.set(CommonParams.START,rb.shards_start);
- } else {
- sreq.params.set(CommonParams.START, "0");
- }
- if(rb.shards_rows > -1) {
- // if the client set shards.rows set this explicity
- sreq.params.set(CommonParams.ROWS,rb.shards_rows);
- } else {
- sreq.params.set(CommonParams.ROWS, rb.getSortSpec().getOffset() + rb.getSortSpec().getCount());
- }
-
- sreq.params.set("group.distibuted.second","true");
- for (Map.Entry<String, Collection<SearchGroup<String>>> entry : rb.mergedSearchGroups.entrySet()) {
- for (SearchGroup<String> searchGroup : entry.getValue()) {
- String groupValue;
- if (searchGroup.groupValue != null) {
- String rawGroupValue = searchGroup.groupValue;
- FieldType fieldType = rb.req.getSearcher().getSchema().getField(entry.getKey()).getType();
- groupValue = fieldType.indexedToReadable(rawGroupValue);
- } else {
- groupValue = GROUP_NULL_VALUE;
- }
- sreq.params.add("group.topgroups." + entry.getKey(), groupValue);
- }
- }
-
- if ((rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0 || rb.getSortSpec().includesScore()) {
- sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName() + ",score");
- } else {
- sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName());
- }
- sreqs[i++] = sreq;
}
- return sreqs;
+ return createRequest(rb, uniqueShards.toArray(new String[uniqueShards.size()]));
+ }
+
+ private ShardRequest[] createRequestForAllShards(ResponseBuilder rb) {
+ return createRequest(rb, ShardRequest.ALL_SHARDS);
}
- private ShardRequest[] createRequestForAllShards(ResponseBuilder rb) {
+ private ShardRequest[] createRequest(ResponseBuilder rb, String[] shards)
+ {
ShardRequest sreq = new ShardRequest();
+ sreq.shards = shards;
sreq.purpose = ShardRequest.PURPOSE_GET_TOP_IDS;
sreq.params = new ModifiableSolrParams(rb.req.getParams());
+
// If group.format=simple group.offset doesn't make sense
Grouping.Format responseFormat = rb.getGroupingSpec().getResponseFormat();
if (responseFormat == Grouping.Format.simple || rb.getGroupingSpec().isMain()) {
sreq.params.remove(GroupParams.GROUP_OFFSET);
}
+
sreq.params.remove(ShardParams.SHARDS);
// set the start (offset) to 0 for each shard request so we can properly merge
// results from the start.
- if(rb.shards_start > -1) {
+ if (rb.shards_start > -1) {
// if the client set shards.start set this explicitly
- sreq.params.set(CommonParams.START,rb.shards_start);
+ sreq.params.set(CommonParams.START, rb.shards_start);
} else {
sreq.params.set(CommonParams.START, "0");
}
- if(rb.shards_rows > -1) {
+ if (rb.shards_rows > -1) {
// if the client set shards.rows set this explicity
- sreq.params.set(CommonParams.ROWS,rb.shards_rows);
+ sreq.params.set(CommonParams.ROWS, rb.shards_rows);
} else {
sreq.params.set(CommonParams.ROWS, rb.getSortSpec().getOffset() + rb.getSortSpec().getCount());
}
- sreq.params.set("group.distibuted.second","true");
+ sreq.params.set("group.distributed.second", "true");
for (Map.Entry<String, Collection<SearchGroup<String>>> entry : rb.mergedSearchGroups.entrySet()) {
for (SearchGroup<String> searchGroup : entry.getValue()) {
String groupValue;
if (searchGroup.groupValue != null) {
- String rawGroupValue = searchGroup.groupValue;
+ String rawGroupValue = searchGroup.groupValue;
FieldType fieldType = rb.req.getSearcher().getSchema().getField(entry.getKey()).getType();
groupValue = fieldType.indexedToReadable(rawGroupValue);
} else {
@@ -169,7 +125,7 @@ public class TopGroupsShardRequestFactor
}
}
- if ( (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES)!=0 || rb.getSortSpec().includesScore()) {
+ if ((rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES)!=0 || rb.getSortSpec().includesScore()) {
sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName() + ",score");
} else {
sreq.params.set(CommonParams.FL, rb.req.getSchema().getUniqueKeyField().getName());
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java?rev=1243374&r1=1243373&r2=1243374&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/search/grouping/distributed/responseprocessor/SearchGroupShardResponseProcessor.java Mon Feb 13 00:00:45 2012
@@ -45,12 +45,12 @@ public class SearchGroupShardResponsePro
String[] fields = rb.getGroupingSpec().getFields();
Map<String, List<Collection<SearchGroup<String>>>> commandSearchGroups = new HashMap<String, List<Collection<SearchGroup<String>>>>();
- Map<String, Map<SearchGroup<String>, String>> tempSearchGroupToShard = new HashMap<String, Map<SearchGroup<String>, String>>();
+ Map<String, Map<SearchGroup<String>, Set<String>>> tempSearchGroupToShards = new HashMap<String, Map<SearchGroup<String>, Set<String>>>();
for (String field : fields) {
commandSearchGroups.put(field, new ArrayList<Collection<SearchGroup<String>>>(shardRequest.responses.size()));
- tempSearchGroupToShard.put(field, new HashMap<SearchGroup<String>, String>());
- if (!rb.searchGroupToShard.containsKey(field)) {
- rb.searchGroupToShard.put(field, new HashMap<SearchGroup<String>, String>());
+ tempSearchGroupToShards.put(field, new HashMap<SearchGroup<String>, Set<String>>());
+ if (!rb.searchGroupToShards.containsKey(field)) {
+ rb.searchGroupToShards.put(field, new HashMap<SearchGroup<String>, Set<String>>());
}
}
@@ -68,7 +68,13 @@ public class SearchGroupShardResponsePro
commandSearchGroups.get(field).add(searchGroups);
for (SearchGroup<String> searchGroup : searchGroups) {
- tempSearchGroupToShard.get(field).put(searchGroup, srsp.getShard());
+ Map<SearchGroup<String>, Set<String>> map = tempSearchGroupToShards.get(field);
+ Set<String> shards = map.get(searchGroup);
+ if (shards == null) {
+ shards = new HashSet<String>();
+ map.put(searchGroup, shards);
+ }
+ shards.add(srsp.getShard());
}
}
}
@@ -81,7 +87,7 @@ public class SearchGroupShardResponsePro
rb.mergedSearchGroups.put(groupField, mergedTopGroups);
for (SearchGroup<String> mergedTopGroup : mergedTopGroups) {
- rb.searchGroupToShard.get(groupField).put(mergedTopGroup, tempSearchGroupToShard.get(groupField).get(mergedTopGroup));
+ rb.searchGroupToShards.get(groupField).put(mergedTopGroup, tempSearchGroupToShards.get(groupField).get(mergedTopGroup));
}
}
} catch (IOException e) {
Modified: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java?rev=1243374&r1=1243373&r2=1243374&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/TestDistributedGrouping.java Mon Feb 13 00:00:45 2012
@@ -18,14 +18,8 @@ package org.apache.solr;
*/
import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
/**
* TODO? perhaps use:
* http://docs.codehaus.org/display/JETTY/ServletTester
@@ -68,11 +62,11 @@ public class TestDistributedGrouping ext
indexr(id,3, s1, 2, tlong, 2,t1,"how now brown cow",
tdate_a, "2010-05-03T11:00:00Z");
indexr(id,4, s1, -100 ,tlong, 101,
- t1,"the quick fox jumped over the lazy dog",
+ t1,"the quick fox jumped over the lazy dog",
tdate_a, "2010-05-03T11:00:00Z",
tdate_b, "2010-05-03T11:00:00Z");
indexr(id,5, s1, 500, tlong, 500 ,
- t1,"the quick fox jumped way over the lazy dog",
+ t1,"the quick fox jumped way over the lazy dog",
tdate_a, "2010-05-05T11:00:00Z");
indexr(id,6, s1, -600, tlong, 600 ,t1,"humpty dumpy sat on a wall");
indexr(id,7, s1, 123, tlong, 123 ,t1,"humpty dumpy had a great fall");
@@ -98,8 +92,39 @@ public class TestDistributedGrouping ext
}
indexr(id, 17, "SubjectTerms_mfacet", vals);
- for (int i=100; i<150; i++) {
- indexr(id, i);
+ indexr(
+ id, 18, s1, "232", tlong, 332,
+ t1,"no eggs on wall, lesson learned",
+ oddField, "odd man out"
+ );
+ indexr(
+ id, 19, s1, "232", tlong, 432,
+ t1, "many eggs on wall",
+ oddField, "odd man in"
+ );
+ indexr(
+ id, 20, s1, "232", tlong, 532,
+ t1, "some eggs on wall",
+ oddField, "odd man between"
+ );
+ indexr(
+ id, 21, s1, "232", tlong, 632,
+ t1, "a few eggs on wall",
+ oddField, "odd man under"
+ );
+ indexr(
+ id, 22, s1, "232", tlong, 732,
+ t1, "any eggs on wall",
+ oddField, "odd man above"
+ );
+ indexr(
+ id, 23, s1, "233", tlong, 734,
+ t1, "dirty eggs",
+ oddField, "odd eggs"
+ );
+
+ for (int i = 100; i < 150; i++) {
+ indexr(id, i);
}
int[] values = new int[]{9999, 99999, 999999, 9999999};
@@ -134,6 +159,10 @@ public class TestDistributedGrouping ext
query("q", "*:*", "rows", 100, "fl", "id," + s1, "group", "true", "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", s1 + " asc, id asc");
query("q", "*:*", "rows", 100, "fl", "id," + s1, "group", "true", "group.field", s1, "group.query", t1 + ":kings OR " + t1 + ":eggs", "group.limit", 10, "sort", s1 + " asc, id asc");
+ // SOLR-3109
+ query("q", t1 + ":eggs", "rows", 100, "fl", "id," + s1, "group", "true", "group.field", s1, "group.limit", 10, "sort", tlong + " asc, id asc");
+ query("q", s1 + ":232", "rows", 100, "fl", "id," + s1, "group", "true", "group.field", s1, "group.limit", 10, "sort", tlong + " asc, id asc");
+
// In order to validate this we need to make sure that during indexing that all documents of one group only occur on the same shard
query("q", "*:*", "fq", s2 + ":a", "rows", 100, "fl", "id," + s1, "group", "true", "group.field", s1, "group.limit", 10, "sort", s1 + " asc, id asc", "group.ngroups", "true");
query("q", "*:*", "fq", s2 + ":a", "rows", 100, "fl", "id," + s1, "group", "true", "group.field", s1, "group.limit", 10, "sort", s1 + " asc, id asc", "group.truncate", "true");