You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/05/10 21:01:20 UTC

svn commit: r1481145 - in /lucene/dev/trunk/solr: CHANGES.txt core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java

Author: shalin
Date: Fri May 10 19:01:20 2013
New Revision: 1481145

URL: http://svn.apache.org/r1481145
Log:
SOLR-4806: Shard splitting does not abort if WaitForState times out

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1481145&r1=1481144&r2=1481145&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Fri May 10 19:01:20 2013
@@ -162,6 +162,8 @@ Bug Fixes
 * SOLR-4791: solr.xml sharedLib does not work in 4.3.0 (Ryan Ernst, Jan Høydahl via 
   Erick Erickson)
 
+* SOLR-4806: Shard splitting does not abort if WaitForState times out (shalin)
+
 Other Changes
 ----------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java?rev=1481145&r1=1481144&r2=1481145&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java Fri May 10 19:01:20 2013
@@ -372,13 +372,10 @@ public class OverseerCollectionProcessor
         }
       }
 
-      ShardResponse srsp;
-      do {
-        srsp = shardHandler.takeCompletedOrError();
-        if (srsp != null) {
-          processResponse(results, srsp);
-        }
-      } while (srsp != null);
+      // do not abort splitshard if the unloading fails
+      // this can happen because the replicas created previously may be down
+      // the only side effect of this is that the sub shard may end up having more replicas than we want
+      collectShardResponses(results, false, null);
 
       for (int i=0; i<subRanges.size(); i++)  {
         String subSlice = subSlices.get(i);
@@ -413,12 +410,8 @@ public class OverseerCollectionProcessor
         sendShardRequest(nodeName, new ModifiableSolrParams(cmd.getParams()));
       }
 
-      do {
-        srsp = shardHandler.takeCompletedOrError();
-        if (srsp != null) {
-          processResponse(results, srsp);
-        }
-      } while (srsp != null);
+      collectShardResponses(results, true,
+          "SPLTSHARD failed to create subshard leaders or timed out waiting for them to come up");
       
       log.info("Successfully created all sub-shards for collection "
           + collectionName + " parent shard: " + slice + " on: " + parentShardLeader);
@@ -436,12 +429,7 @@ public class OverseerCollectionProcessor
       }
 
       sendShardRequest(parentShardLeader.getNodeName(), params);
-      do {
-        srsp = shardHandler.takeCompletedOrError();
-        if (srsp != null) {
-          processResponse(results, srsp);
-        }
-      } while (srsp != null);
+      collectShardResponses(results, true, "SPLITSHARD failed to invoke SPLIT core admin command");
 
       log.info("Index on shard: " + nodeName + " split into two successfully");
 
@@ -458,12 +446,8 @@ public class OverseerCollectionProcessor
         sendShardRequest(nodeName, params);
       }
 
-      do {
-        srsp = shardHandler.takeCompletedOrError();
-        if (srsp != null) {
-          processResponse(results, srsp);
-        }
-      } while (srsp != null);
+      collectShardResponses(results, true,
+          "SPLITSHARD failed while asking sub shard leaders to apply buffered updates");
 
       log.info("Successfully applied buffered updates on : " + subShardNames);
 
@@ -535,12 +519,9 @@ public class OverseerCollectionProcessor
         }
       }
 
-      do {
-        srsp = shardHandler.takeCompletedOrError();
-        if (srsp != null) {
-          processResponse(results, srsp);
-        }
-      } while (srsp != null);
+      collectShardResponses(results, true,
+          "SPLTSHARD failed to create subshard replicas or timed out waiting for them to come up");
+
       log.info("Successfully created all replica shards for all sub-slices "
           + subSlices);
 
@@ -565,6 +546,19 @@ public class OverseerCollectionProcessor
     }
   }
 
+  private void collectShardResponses(NamedList results, boolean abortOnError, String msgOnError) {
+    ShardResponse srsp;
+    do {
+      srsp = shardHandler.takeCompletedOrError();
+      if (srsp != null) {
+        processResponse(results, srsp);
+        if (abortOnError && srsp.getException() != null)  {
+          throw new SolrException(ErrorCode.SERVER_ERROR, msgOnError, srsp.getException());
+        }
+      }
+    } while (srsp != null);
+  }
+
   private void sendShardRequest(String nodeName, ModifiableSolrParams params) {
     ShardRequest sreq = new ShardRequest();
     params.set("qt", adminPath);