You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/05/10 09:12:23 UTC

svn commit: r1480898 - in /lucene/dev/branches/lucene_solr_4_3: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/cloud/ solr/core/src/test/org/apache/solr/cloud/

Author: shalin
Date: Fri May 10 07:12:23 2013
New Revision: 1480898

URL: http://svn.apache.org/r1480898
Log:
SOLR-4797: Shard splitting creates sub shards which have the wrong hash range in cluster state. This happens when numShards is not a power of two and router is compositeId

Modified:
    lucene/dev/branches/lucene_solr_4_3/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/core/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
    lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java
    lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java

Modified: lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt?rev=1480898&r1=1480897&r2=1480898&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt Fri May 10 07:12:23 2013
@@ -45,6 +45,9 @@ Bug Fixes
   may be placed in the wrong shard when the default compositeId router
   is used in conjunction with IDs containing "!". (yonik)
 
+* SOLR-4797: Shard splitting creates sub shards which have the wrong hash
+  range in cluster state. This happens when numShards is not a power of two
+  and router is compositeId. (shalin)
 
 
 Other Changes

Modified: lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java?rev=1480898&r1=1480897&r2=1480898&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java Fri May 10 07:12:23 2013
@@ -319,21 +319,22 @@ public class OverseerCollectionProcessor
   
   private boolean splitShard(ClusterState clusterState, ZkNodeProps message, NamedList results) {
     log.info("Split shard invoked");
-    String collection = message.getStr("collection");
+    String collectionName = message.getStr("collection");
     String slice = message.getStr(ZkStateReader.SHARD_ID_PROP);
-    Slice parentSlice = clusterState.getSlice(collection, slice);
+    Slice parentSlice = clusterState.getSlice(collectionName, slice);
     
     if (parentSlice == null) {
-      if(clusterState.getCollections().contains(collection)) {
+      if(clusterState.getCollections().contains(collectionName)) {
         throw new SolrException(ErrorCode.BAD_REQUEST, "No shard with the specified name exists: " + slice);
       } else {
-        throw new SolrException(ErrorCode.BAD_REQUEST, "No collection with the specified name exists: " + collection);
+        throw new SolrException(ErrorCode.BAD_REQUEST, "No collection with the specified name exists: " + collectionName);
       }      
     }
     
     // find the leader for the shard
-    Replica parentShardLeader = clusterState.getLeader(collection, slice);
-    
+    Replica parentShardLeader = clusterState.getLeader(collectionName, slice);
+    DocCollection collection = clusterState.getCollection(collectionName);
+    DocRouter router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
     DocRouter.Range range = parentSlice.getRange();
     if (range == null) {
       range = new PlainIdRouter().fullRange();
@@ -341,8 +342,7 @@ public class OverseerCollectionProcessor
 
     // todo: fixed to two partitions?
     // todo: accept the range as a param to api?
-    // todo: handle randomizing subshard name in case a shard with the same name already exists.
-    List<DocRouter.Range> subRanges = new PlainIdRouter().partitionRange(2, range);
+    List<DocRouter.Range> subRanges = router.partitionRange(2, range);
     try {
       List<String> subSlices = new ArrayList<String>(subRanges.size());
       List<String> subShardNames = new ArrayList<String>(subRanges.size());
@@ -350,10 +350,10 @@ public class OverseerCollectionProcessor
       for (int i = 0; i < subRanges.size(); i++) {
         String subSlice = slice + "_" + i;
         subSlices.add(subSlice);
-        String subShardName = collection + "_" + subSlice + "_replica1";
+        String subShardName = collectionName + "_" + subSlice + "_replica1";
         subShardNames.add(subShardName);
 
-        Slice oSlice = clusterState.getSlice(collection, subSlice);
+        Slice oSlice = clusterState.getSlice(collectionName, subSlice);
         if (oSlice != null) {
           if (Slice.ACTIVE.equals(oSlice.getState())) {
             throw new SolrException(ErrorCode.BAD_REQUEST, "Sub-shard: " + subSlice + " exists in active state. Aborting split shard.");
@@ -385,14 +385,14 @@ public class OverseerCollectionProcessor
         DocRouter.Range subRange = subRanges.get(i);
 
         log.info("Creating shard " + subShardName + " as part of slice "
-            + subSlice + " of collection " + collection + " on "
+            + subSlice + " of collection " + collectionName + " on "
             + nodeName);
 
         ModifiableSolrParams params = new ModifiableSolrParams();
         params.set(CoreAdminParams.ACTION, CoreAdminAction.CREATE.toString());
 
         params.set(CoreAdminParams.NAME, subShardName);
-        params.set(CoreAdminParams.COLLECTION, collection);
+        params.set(CoreAdminParams.COLLECTION, collectionName);
         params.set(CoreAdminParams.SHARD, subSlice);
         params.set(CoreAdminParams.SHARD_RANGE, subRange.toString());
         params.set(CoreAdminParams.SHARD_STATE, Slice.CONSTRUCTION);
@@ -420,10 +420,10 @@ public class OverseerCollectionProcessor
       } while (srsp != null);
       
       log.info("Successfully created all sub-shards for collection "
-          + collection + " parent shard: " + slice + " on: " + parentShardLeader);
+          + collectionName + " parent shard: " + slice + " on: " + parentShardLeader);
 
       log.info("Splitting shard " + parentShardLeader.getName() + " as part of slice "
-          + slice + " of collection " + collection + " on "
+          + slice + " of collection " + collectionName + " on "
           + parentShardLeader);
 
       ModifiableSolrParams params = new ModifiableSolrParams();
@@ -473,7 +473,7 @@ public class OverseerCollectionProcessor
 
       // TODO: Have replication factor decided in some other way instead of numShards for the parent
 
-      int repFactor = clusterState.getSlice(collection, slice).getReplicas().size();
+      int repFactor = clusterState.getSlice(collectionName, slice).getReplicas().size();
 
       // we need to look at every node and see how many cores it serves
       // add our new cores to existing nodes serving the least number of cores
@@ -500,10 +500,10 @@ public class OverseerCollectionProcessor
         String sliceName = subSlices.get(i - 1);
         for (int j = 2; j <= repFactor; j++) {
           String subShardNodeName = nodeList.get((repFactor * (i - 1) + (j - 2)) % nodeList.size());
-          String shardName = collection + "_" + sliceName + "_replica" + (j);
+          String shardName = collectionName + "_" + sliceName + "_replica" + (j);
 
           log.info("Creating replica shard " + shardName + " as part of slice "
-              + sliceName + " of collection " + collection + " on "
+              + sliceName + " of collection " + collectionName + " on "
               + subShardNodeName);
 
           // Need to create new params for each request
@@ -511,7 +511,7 @@ public class OverseerCollectionProcessor
           params.set(CoreAdminParams.ACTION, CoreAdminAction.CREATE.toString());
 
           params.set(CoreAdminParams.NAME, shardName);
-          params.set(CoreAdminParams.COLLECTION, collection);
+          params.set(CoreAdminParams.COLLECTION, collectionName);
           params.set(CoreAdminParams.SHARD, sliceName);
           // TODO:  Figure the config used by the parent shard and use it.
           //params.set("collection.configName", configName);
@@ -551,7 +551,7 @@ public class OverseerCollectionProcessor
       for (String subSlice : subSlices) {
         propMap.put(subSlice, Slice.ACTIVE);
       }
-      propMap.put(ZkStateReader.COLLECTION_PROP, collection);
+      propMap.put(ZkStateReader.COLLECTION_PROP, collectionName);
       ZkNodeProps m = new ZkNodeProps(propMap);
       inQueue.offer(ZkStateReader.toJSON(m));
 
@@ -559,7 +559,7 @@ public class OverseerCollectionProcessor
     } catch (SolrException e) {
       throw e;
     } catch (Exception e) {
-      log.error("Error executing split operation for collection: " + collection + " parent shard: " + slice, e);
+      log.error("Error executing split operation for collection: " + collectionName + " parent shard: " + slice, e);
       throw new SolrException(ErrorCode.SERVER_ERROR, null, e);
     }
   }

Modified: lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java?rev=1480898&r1=1480897&r2=1480898&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeyShardSplitTest.java Fri May 10 07:12:23 2013
@@ -63,7 +63,7 @@ public class ChaosMonkeyShardSplitTest e
     waitForThingsToLevelOut(15);
 
     ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
-    DocRouter router = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION).getRouter();
+    final DocRouter router = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION).getRouter();
     Slice shard1 = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
     DocRouter.Range shard1Range = shard1.getRange() != null ? shard1.getRange() : router.fullRange();
     final List<DocRouter.Range> ranges = router.partitionRange(2, shard1Range);
@@ -78,7 +78,7 @@ public class ChaosMonkeyShardSplitTest e
     try {
       del("*:*");
       for (int id = 0; id < 100; id++) {
-        indexAndUpdateCount(ranges, docCounts, id);
+        indexAndUpdateCount(router, ranges, docCounts, String.valueOf(id));
       }
       commit();
 
@@ -88,7 +88,7 @@ public class ChaosMonkeyShardSplitTest e
           int max = atLeast(401);
           for (int id = 101; id < max; id++) {
             try {
-              indexAndUpdateCount(ranges, docCounts, id);
+              indexAndUpdateCount(router, ranges, docCounts, String.valueOf(id));
               Thread.sleep(atLeast(25));
             } catch (Exception e) {
               log.error("Exception while adding doc", e);

Modified: lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java?rev=1480898&r1=1480897&r2=1480898&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/cloud/ShardSplitTest.java Fri May 10 07:12:23 2013
@@ -28,13 +28,15 @@ import org.apache.solr.client.solrj.requ
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.cloud.ClusterState;
+import org.apache.solr.common.cloud.CompositeIdRouter;
 import org.apache.solr.common.cloud.DocRouter;
+import org.apache.solr.common.cloud.HashBasedRouter;
+import org.apache.solr.common.cloud.PlainIdRouter;
 import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.ZkCoreNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.util.Hash;
 import org.apache.solr.handler.admin.CollectionsHandler;
 import org.apache.solr.update.DirectUpdateHandler2;
 import org.apache.zookeeper.KeeperException;
@@ -52,6 +54,10 @@ public class ShardSplitTest extends Basi
   public static final String SHARD1_0 = SHARD1 + "_0";
   public static final String SHARD1_1 = SHARD1 + "_1";
 
+  public ShardSplitTest() {
+    schemaString = "schema15.xml";      // we need a string id
+  }
+
   @Override
   @Before
   public void setUp() throws Exception {
@@ -94,7 +100,7 @@ public class ShardSplitTest extends Basi
     waitForThingsToLevelOut(15);
 
     ClusterState clusterState = cloudClient.getZkStateReader().getClusterState();
-    DocRouter router = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION).getRouter();
+    final DocRouter router = clusterState.getCollection(AbstractDistribZkTestBase.DEFAULT_COLLECTION).getRouter();
     Slice shard1 = clusterState.getSlice(AbstractDistribZkTestBase.DEFAULT_COLLECTION, SHARD1);
     DocRouter.Range shard1Range = shard1.getRange() != null ? shard1.getRange() : router.fullRange();
     final List<DocRouter.Range> ranges = router.partitionRange(2, shard1Range);
@@ -102,8 +108,9 @@ public class ShardSplitTest extends Basi
     int numReplicas = shard1.getReplicas().size();
 
     del("*:*");
-    for (int id = 0; id < 100; id++) {
-      indexAndUpdateCount(ranges, docCounts, id);
+    for (int id = 0; id <= 100; id++) {
+      String shardKey = "" + (char)('a' + (id % 26)); // See comment in ShardRoutingTest for hash distribution
+      indexAndUpdateCount(router, ranges, docCounts, shardKey + "!" + String.valueOf(id));
     }
     commit();
 
@@ -113,7 +120,7 @@ public class ShardSplitTest extends Basi
         int max = atLeast(401);
         for (int id = 101; id < max; id++) {
           try {
-            indexAndUpdateCount(ranges, docCounts, id);
+            indexAndUpdateCount(router, ranges, docCounts, String.valueOf(id));
             Thread.sleep(atLeast(25));
           } catch (Exception e) {
             log.error("Exception while adding doc", e);
@@ -201,12 +208,14 @@ public class ShardSplitTest extends Basi
     baseServer.request(request);
   }
 
-  protected void indexAndUpdateCount(List<DocRouter.Range> ranges, int[] docCounts, int id) throws Exception {
-    indexr("id", id);
+  protected void indexAndUpdateCount(DocRouter router, List<DocRouter.Range> ranges, int[] docCounts, String id) throws Exception {
+    index("id", id);
 
-    // todo - hook in custom hashing
-    byte[] bytes = String.valueOf(id).getBytes("UTF-8");
-    int hash = Hash.murmurhash3_x86_32(bytes, 0, bytes.length, 0);
+    int hash = 0;
+    if (router instanceof HashBasedRouter) {
+      HashBasedRouter hashBasedRouter = (HashBasedRouter) router;
+      hash = hashBasedRouter.sliceHash(id, null, null);
+    }
     for (int i = 0; i < ranges.size(); i++) {
       DocRouter.Range range = ranges.get(i);
       if (range.includes(hash))