You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/05/10 09:12:23 UTC
svn commit: r1480898 - in /lucene/dev/branches/lucene_solr_4_3: ./ solr/
solr/core/ solr/core/src/java/org/apache/solr/cloud/
solr/core/src/test/org/apache/solr/cloud/
Author: shalin
Date: Fri May 10 07:12:23 2013
New Revision: 1480898
URL: http://svn.apache.org/r1480898
Log:
SOLR-4797: Shard splitting creates sub shards which have the wrong hash range in cluster state. This happens when numShards is not a power of two and router is compositeId
Modified:
lucene/dev/branches/lucene_solr_4_3/ (props changed)
lucene/dev/branches/lucene_solr_4_3/solr/ (props changed)
lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/lucene_solr_4_3/solr/core/ (props changed)
lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java
lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java
Modified: lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt?rev=1480898&r1=1480897&r2=1480898&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt Fri May 10 07:12:23 2013
@@ -45,6 +45,9 @@ Bug Fixes
may be placed in the wrong shard when the default compositeId router
is used in conjunction with IDs containing "!". (yonik)
+* SOLR-4797: Shard splitting creates sub shards which have the wrong hash
+ range in cluster state. This happens when numShards is not a power of two
+ and router is compositeId. (shalin)
Other Changes
Modified: lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java?rev=1480898&r1=1480897&r2=1480898&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java Fri May 10 07:12:23 2013
@@ -319,21 +319,22 @@ public class OverseerCollectionProcessor
private boolean splitShard(ClusterState clusterState, ZkNodeProps message, NamedList results) {
log.info("Split shard invoked");
- String collection = message.getStr("collection");
+ String collectionName = message.getStr("collection");
String slice = message.getStr(ZkStateReader.SHARD_ID_PROP);
- Slice parentSlice = clusterState.getSlice(collection, slice);
+ Slice parentSlice = clusterState.getSlice(collectionName, slice);
if (parentSlice == null) {
- if(clusterState.getCollections().contains(collection)) {
+ if(clusterState.getCollections().contains(collectionName)) {
throw new SolrException(ErrorCode.BAD_REQUEST, "No shard with the specified name exists: " + slice);
} else {
- throw new SolrException(ErrorCode.BAD_REQUEST, "No collection with the specified name exists: " + collection);
+ throw new SolrException(ErrorCode.BAD_REQUEST, "No collection with the specified name exists: " + collectionName);
}
}
// find the leader for the shard
- Replica parentShardLeader = clusterState.getLeader(collection, slice);
-
+ Replica parentShardLeader = clusterState.getLeader(collectionName, slice);
+ DocCollection collection = clusterState.getCollection(collectionName);
+ DocRouter router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
DocRouter.Range range = parentSlice.getRange();
if (range == null) {
range = new PlainIdRouter().fullRange();
@@ -341,8 +342,7 @@ public class OverseerCollectionProcessor
// todo: fixed to two partitions?
// todo: accept the range as a param to api?
- // todo: handle randomizing subshard name in case a shard with the same name already exists.
- List<DocRouter.Range> subRanges = new PlainIdRouter().partitionRange(2, range);
+ List<DocRouter.Range> subRanges = router.partitionRange(2, range);
try {
List<String> subSlices = new ArrayList<String>(subRanges.size());
List<String> subShardNames = new ArrayList<String>(subRanges.size());
@@ -350,10 +350,10 @@ public class OverseerCollectionProcessor
for (int i = 0; i < subRanges.size(); i++) {
String subSlice = slice + "_" + i;
subSlices.add(subSlice);
- String subShardName = collection + "_" + subSlice + "_replica1";
+ String subShardName = collectionName + "_" + subSlice + "_replica1";
subShardNames.add(subShardName);
- Slice oSlice = clusterState.getSlice(collection, subSlice);
+ Slice oSlice = clusterState.getSlice(collectionName, subSlice);
if (oSlice != null) {
if (Slice.ACTIVE.equals(oSlice.getState())) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Sub-shard: " + subSlice + " exists in active state. Aborting split shard.");
@@ -385,14 +385,14 @@ public class OverseerCollectionProcessor
DocRouter.Range subRange = subRanges.get(i);
log.info("Creating shard " + subShardName + " as part of slice "
- + subSlice + " of collection " + collection + " on "
+ + subSlice + " of collection " + collectionName + " on "
+ nodeName);
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CoreAdminParams.ACTION, CoreAdminAction.CREATE.toString());
params.set(CoreAdminParams.NAME, subShardName);
- params.set(CoreAdminParams.COLLECTION, collection);
+ params.set(CoreAdminParams.COLLECTION, collectionName);
params.set(CoreAdminParams.SHARD, subSlice);
params.set(CoreAdminParams.SHARD_RANGE, subRange.toString());
params.set(CoreAdminParams.SHARD_STATE, Slice.CONSTRUCTION);
@@ -420,10 +420,10 @@ public class OverseerCollectionProcessor
} while (srsp != null);
log.info("Successfully created all sub-shards for collection "
- + collection + " parent shard: " + slice + " on: " + parentShardLeader);
+ + collectionName + " parent shard: " + slice + " on: " + parentShardLeader);
log.info("Splitting shard " + parentShardLeader.getName() + " as part of slice "
- + slice + " of collection " + collection + " on "
+ + slice + " of collection " + collectionName + " on "
+ parentShardLeader);
ModifiableSolrParams params = new ModifiableSolrParams();
@@ -473,7 +473,7 @@ public class OverseerCollectionProcessor
// TODO: Have replication factor decided in some other way instead of numShards for the parent
- int repFactor = clusterState.getSlice(collection, slice).getReplicas().size();
+ int repFactor = clusterState.getSlice(collectionName, slice).getReplicas().size();
// we need to look at every node and see how many cores it serves
// add our new cores to existing nodes serving the least number of cores
@@ -500,10 +500,10 @@ public class OverseerCollectionProcessor
String sliceName = subSlices.get(i - 1);
for (int j = 2; j <= repFactor; j++) {
String subShardNodeName = nodeList.get((repFactor * (i - 1) + (j - 2)) % nodeList.size());
- String shardName = collection + "_" + sliceName + "_replica" + (j);
+ String shardName = collectionName + "_" + sliceName + "_replica" + (j);
log.info("Creating replica shard " + shardName + " as part of slice "
- + sliceName + " of collection " + collection + " on "
+ + sliceName + " of collection " + collectionName + " on "
+ subShardNodeName);
// Need to create new params for each request
@@ -511,7 +511,7 @@ public class OverseerCollectionProcessor
params.set(CoreAdminParams.ACTION, CoreAdminAction.CREATE.toString());
params.set(CoreAdminParams.NAME, shardName);
- params.set(CoreAdminParams.COLLECTION, collection);
+ params.set(CoreAdminParams.COLLECTION, collectionName);
params.set(CoreAdminParams.SHARD, sliceName);
// TODO: Figure the config used by the parent shard and use it.
//params.set("collection.configName", configName);
@@ -551,7 +551,7 @@ public class OverseerCollectionProcessor
for (String subSlice : subSlices) {
propMap.put(subSlice, Slice.ACTIVE);
}
- propMap.put(ZkStateReader.COLLECTION_PROP, collection);
+ propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
ZkNodeProps m = new ZkNodeProps(propMap);
inQueue.offer(ZkStateReader.toJSON(m));
@@ -559,7 +559,7 @@ public class OverseerCollectionProcessor
} catch (SolrException e) {
throw e;
} catch (Exception e) {
- log.error("Error executing split operation for collection: " + collection + " parent shard: " + slice, e);
+ log.error("Error executing split operation for collection: " + collectionName + " parent shard: " + slice, e);
throw new SolrException(ErrorCode.SERVER_ERROR, null, e);
}
}
Modified: lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java?rev=1480898&r1=1480897&r2=1480898&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java Fri May 10 07:12:23 2013
@@ -63,7 +63,7 @@ public class ChaosMonkeyShardSplitTest e
waitForThingsToLevelOut(15);
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
- DocRouter router = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION).getRouter();
+ final DocRouter router = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION).getRouter();
Slice shard1 = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
DocRouter.Range shard1Range = shard1.getRange() != null ? shard1.getRange() : router.fullRange();
final List<DocRouter.Range> ranges = router.partitionRange(2, shard1Range);
@@ -78,7 +78,7 @@ public class ChaosMonkeyShardSplitTest e
try {
del("*:*");
for (int id = 0; id < 100; id++) {
- indexAndUpdateCount(ranges, docCounts, id);
+ indexAndUpdateCount(router, ranges, docCounts, String.valueOf(id));
}
commit();
@@ -88,7 +88,7 @@ public class ChaosMonkeyShardSplitTest e
int max = atLeast(401);
for (int id = 101; id < max; id++) {
try {
- indexAndUpdateCount(ranges, docCounts, id);
+ indexAndUpdateCount(router, ranges, docCounts, String.valueOf(id));
Thread.sleep(atLeast(25));
} catch (Exception e) {
log.error("Exception while adding doc", e);
Modified: lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java?rev=1480898&r1=1480897&r2=1480898&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java Fri May 10 07:12:23 2013
@@ -28,13 +28,15 @@ import org.apache.solr.client.solrj.requ
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.CompositeIdRouter;
import org.apache.solr.common.cloud.DocRouter;
+import org.apache.solr.common.cloud.HashBasedRouter;
+import org.apache.solr.common.cloud.PlainIdRouter;
import org.apache.solr.common.cloud.Slice;
import org.apache.solr.common.cloud.ZkCoreNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.params.CollectionParams;
import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.util.Hash;
import org.apache.solr.handler.admin.CollectionsHandler;
import org.apache.solr.update.DirectUpdateHandler2;
import org.apache.zookeeper.KeeperException;
@@ -52,6 +54,10 @@ public class ShardSplitTest extends Basi
public static final String SHARD1_0 = SHARD1 + "_0";
public static final String SHARD1_1 = SHARD1 + "_1";
+ public ShardSplitTest() {
+ schemaString = "schema15.xml"; // we need a string id
+ }
+
@Override
@Before
public void setUp() throws Exception {
@@ -94,7 +100,7 @@ public class ShardSplitTest extends Basi
waitForThingsToLevelOut(15);
ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
- DocRouter router = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION).getRouter();
+ final DocRouter router = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION).getRouter();
Slice shard1 = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
DocRouter.Range shard1Range = shard1.getRange() != null ? shard1.getRange() : router.fullRange();
final List<DocRouter.Range> ranges = router.partitionRange(2, shard1Range);
@@ -102,8 +108,9 @@ public class ShardSplitTest extends Basi
int numReplicas = shard1.getReplicas().size();
del("*:*");
- for (int id = 0; id < 100; id++) {
- indexAndUpdateCount(ranges, docCounts, id);
+ for (int id = 0; id <= 100; id++) {
+ String shardKey = "" + (char)('a' + (id % 26)); // See comment in ShardRoutingTest for hash distribution
+ indexAndUpdateCount(router, ranges, docCounts, shardKey + "!" + String.valueOf(id));
}
commit();
@@ -113,7 +120,7 @@ public class ShardSplitTest extends Basi
int max = atLeast(401);
for (int id = 101; id < max; id++) {
try {
- indexAndUpdateCount(ranges, docCounts, id);
+ indexAndUpdateCount(router, ranges, docCounts, String.valueOf(id));
Thread.sleep(atLeast(25));
} catch (Exception e) {
log.error("Exception while adding doc", e);
@@ -201,12 +208,14 @@ public class ShardSplitTest extends Basi
baseServer.request(request);
}
- protected void indexAndUpdateCount(List<DocRouter.Range> ranges, int[] docCounts, int id) throws Exception {
- indexr("id", id);
+ protected void indexAndUpdateCount(DocRouter router, List<DocRouter.Range> ranges, int[] docCounts, String id) throws Exception {
+ index("id", id);
- // todo - hook in custom hashing
- byte[] bytes = String.valueOf(id).getBytes("UTF-8");
- int hash = Hash.murmurhash3_x86_32(bytes, 0, bytes.length, 0);
+ int hash = 0;
+ if (router instanceof HashBasedRouter) {
+ HashBasedRouter hashBasedRouter = (HashBasedRouter) router;
+ hash = hashBasedRouter.sliceHash(id, null, null);
+ }
for (int i = 0; i < ranges.size(); i++) {
DocRouter.Range range = ranges.get(i);
if (range.includes(hash))