You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2012/01/20 17:02:55 UTC

svn commit: r1233976 - in /lucene/dev/branches/solrcloud/solr: core/src/java/org/apache/solr/cloud/ core/src/java/org/apache/solr/core/ core/src/java/org/apache/solr/handler/ core/src/test/org/apache/solr/cloud/ solrj/src/java/org/apache/solr/common/cl...

Author: markrmiller
Date: Fri Jan 20 16:02:54 2012
New Revision: 1233976

URL: http://svn.apache.org/viewvc?rev=1233976&view=rev
Log:
more refactoring and fixing

Modified:
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/Overseer.java
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
    lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/DefaultConnectionStrategy.java
    lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java?rev=1233976&r1=1233975&r2=1233976&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java Fri Jan 20 16:02:54 2012
@@ -121,7 +121,6 @@ public  class LeaderElector {
                   checkIfIamLeader(leaderSeqPath, seq, context, true);
                 } catch (KeeperException e) {
                   log.warn("", e);
-                  
                 } catch (InterruptedException e) {
                   // Restore the interrupted status
                   Thread.currentThread().interrupt();
@@ -135,7 +134,6 @@ public  class LeaderElector {
       } catch (KeeperException.SessionExpiredException e) {
         throw e;
       } catch (KeeperException e) {
-        e.printStackTrace(System.out);
         // we couldn't set our watch - the node before us may already be down?
         // we need to check if we are the leader again
         checkIfIamLeader(leaderSeqPath, seq, context, true);

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/Overseer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/Overseer.java?rev=1233976&r1=1233975&r2=1233976&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/Overseer.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/Overseer.java Fri Jan 20 16:02:54 2012
@@ -355,13 +355,11 @@ public class Overseer implements NodeSta
     newStates.putAll(state.getCollectionStates());
     
     if (!newStates.containsKey(collection)) {
-      log.info("New collection");
       newStates.put(collection, new LinkedHashMap<String,Slice>());
     }
     
     final Map<String, Slice> slices = newStates.get(collection);
     if (!slices.containsKey(slice.getName())) {
-      log.info("New slice");
       slices.put(slice.getName(), slice);
     } else {
       final Map<String,ZkNodeProps> shards = new LinkedHashMap<String,ZkNodeProps>();

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java?rev=1233976&r1=1233975&r2=1233976&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/RecoveryStrategy.java Fri Jan 20 16:02:54 2012
@@ -32,6 +32,7 @@ import org.apache.solr.common.cloud.ZkCo
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.core.CoreDescriptor;
 import org.apache.solr.core.RequestHandlers.LazyRequestHandlerWrapper;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.handler.ReplicationHandler;
@@ -43,6 +44,7 @@ import org.slf4j.LoggerFactory;
 
 public class RecoveryStrategy extends Thread {
   private static final int MAX_RETRIES = 100;
+  private static final int INTERRUPTED = 101;
   private static final int START_TIMEOUT = 100;
   
   private static final String REPLICATION_HANDLER = "/replication";
@@ -84,9 +86,9 @@ public class RecoveryStrategy extends Th
   
   private void recoveryFailed(final SolrCore core,
       final ZkController zkController, final String baseUrl,
-      final String shardZkNodeName, final CloudDescriptor cloudDesc) {
-    log.error("Recovery failed - I give up.");
-    zkController.publishAsRecoveryFailed(baseUrl, cloudDesc,
+      final String shardZkNodeName, final CoreDescriptor cd) {
+    SolrException.log(log, "Recovery failed - I give up.");
+    zkController.publishAsRecoveryFailed(baseUrl, cd,
         shardZkNodeName, core.getName());
     close = true;
   }
@@ -132,11 +134,11 @@ public class RecoveryStrategy extends Th
       ModifiableSolrParams solrParams = new ModifiableSolrParams();
       solrParams.set(ReplicationHandler.MASTER_URL, leaderUrl + "replication");
       
-      if (close) retries = MAX_RETRIES; 
+      if (close) retries = INTERRUPTED; 
       boolean success = replicationHandler.doFetch(solrParams, true); // TODO: look into making sure fore=true does not download files we already have
 
       if (!success) {
-        throw new RuntimeException("Replication for recovery failed.");
+        throw new SolrException(ErrorCode.SERVER_ERROR, "Replication for recovery failed.");
       }
       
       // nocommit
@@ -157,7 +159,6 @@ public class RecoveryStrategy extends Th
   
   @Override
   public void run() {
-    
     boolean replayed = false;
     boolean succesfulRecovery = false;
     
@@ -183,23 +184,22 @@ public class RecoveryStrategy extends Th
         replayed = true;
         
         // if there are pending recovery requests, don't advert as active
-        
-        zkController.publishAsActive(baseUrl, cloudDesc, coreZkNodeName,
+        zkController.publishAsActive(baseUrl, core.getCoreDescriptor(), coreZkNodeName,
             coreName);
         
         succesfulRecovery = true;
       } catch (InterruptedException e) {
         Thread.currentThread().interrupt();
         log.warn("Recovery was interrupted", e);
-        retries = MAX_RETRIES;
+        retries = INTERRUPTED;
       } catch (Throwable t) {
         SolrException.log(log, "Error while trying to recover", t);
       } finally {
         if (!replayed) {
           try {
             ulog.dropBufferedUpdates();
-          } catch (Exception e) {
-            log.error("", e);
+          } catch (Throwable t) {
+            SolrException.log(log, "", t);
           }
         }
         
@@ -214,9 +214,13 @@ public class RecoveryStrategy extends Th
           SolrException.log(log, "Recovery failed - trying again...");
           retries++;
           if (retries >= MAX_RETRIES) {
-            // TODO: for now, give up after 10 tries - should we do more?
-            recoveryFailed(core, zkController, baseUrl, coreZkNodeName,
-                cloudDesc);
+            if (retries == INTERRUPTED) {
+
+            } else {
+              // TODO: for now, give up after 10 tries - should we do more?
+              recoveryFailed(core, zkController, baseUrl, coreZkNodeName,
+                  core.getCoreDescriptor());
+            }
             break;
           }
           
@@ -233,7 +237,7 @@ public class RecoveryStrategy extends Th
         } catch (InterruptedException e) {
           Thread.currentThread().interrupt();
           log.warn("Recovery was interrupted", e);
-          retries = MAX_RETRIES;
+          retries = INTERRUPTED;
         }
       }
       

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1233976&r1=1233975&r2=1233976&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/cloud/ZkController.java Fri Jan 20 16:02:54 2012
@@ -41,7 +41,6 @@ import org.apache.solr.common.cloud.Solr
 import org.apache.solr.common.cloud.ZkCmdExecutor;
 import org.apache.solr.common.cloud.ZkCoreNodeProps;
 import org.apache.solr.common.cloud.ZkNodeProps;
-import org.apache.solr.common.cloud.ZkOperation;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.cloud.ZooKeeperException;
 import org.apache.solr.common.params.SolrParams;
@@ -166,7 +165,9 @@ public final class ZkController {
           public void command() {
             try {
               // we need to create all of our lost watches
-              Overseer.createClientNodes(zkClient, getNodeName());
+              
+              // seems we dont need to do this again...
+              //Overseer.createClientNodes(zkClient, getNodeName());
 
               ElectionContext context = new OverseerElectionContext(getNodeName(), zkClient, zkStateReader);
               overseerElector.joinElection(context);
@@ -180,7 +181,7 @@ public final class ZkController {
                 for (CoreDescriptor descriptor : descriptors) {
                   final String shardZkNodeName = getNodeName() + "_"
                       + descriptor.getName();
-                  publishAsDown(getBaseUrl(), descriptor.getCloudDescriptor(), shardZkNodeName,
+                  publishAsDown(getBaseUrl(), descriptor, shardZkNodeName,
                       descriptor.getName());
                 }
               }
@@ -470,9 +471,8 @@ public final class ZkController {
         + null);
     CloudState state = CloudState.load(zkClient, zkStateReader.getCloudState().getLiveNodes());
 
-    final String shardZkNodeName = getNodeName() + "_" + coreName;
+    final String coreZkNodeName = getNodeName() + "_" + coreName;
     
-    // checkRecovery will have updated the shardId if it already exists...
     String shardId = cloudDesc.getShardId();
 
     Map<String,String> props = new HashMap<String,String>();
@@ -481,21 +481,6 @@ public final class ZkController {
     props.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
     props.put(ZkStateReader.ROLES_PROP, cloudDesc.getRoles());
     props.put(ZkStateReader.STATE_PROP, ZkStateReader.DOWN);
-    if(shardId!=null) {
-      props.put(ZkStateReader.SHARD_ID_PROP, shardId);
-    }
-
-    if (shardId == null && getShardId(desc, state, shardZkNodeName)) {
-      publishState(cloudDesc, shardZkNodeName, coreName, props); //need to publish state to get overseer assigned id
-      shardId = doGetShardIdProcess(coreName, cloudDesc);
-      cloudDesc.setShardId(shardId);
-      props.put(ZkStateReader.SHARD_ID_PROP, shardId);
-    } else {
-      // shard id was picked up in getShardId
-      props.put(ZkStateReader.SHARD_ID_PROP, cloudDesc.getShardId());
-      shardId = cloudDesc.getShardId();
-      publishState(cloudDesc, shardZkNodeName, coreName, props);
-    }
 
     if (log.isInfoEnabled()) {
         log.info("Register shard - core:" + coreName + " address:"
@@ -508,16 +493,26 @@ public final class ZkController {
         props.get(ZkStateReader.CORE_NAME_PROP), ZkStateReader.NODE_NAME_PROP,
         props.get(ZkStateReader.NODE_NAME_PROP));
     
-    SolrCore core = null;
 
+    joinElection(collection, coreZkNodeName, shardId, leaderProps);
+    
+    String leaderUrl = zkStateReader.getLeaderUrl(collection,
+        cloudDesc.getShardId(), 30000);
+    
+    String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
+    log.info("We are " + ourUrl + " and leader is " + leaderUrl);
+    boolean isLeader = leaderUrl.equals(ourUrl);
+    
+
+    SolrCore core = null;
     if (cc != null) { // CoreContainer only null in tests
       try {
         core = cc.getCore(desc.getName());
-        joinElection(collection, shardZkNodeName, shardId, leaderProps);
-        boolean success = checkRecovery(coreName, desc, recoverReloadedCores, baseUrl, cloudDesc,
-            collection, shardZkNodeName, shardId, leaderProps, core, cc);
-        if (success) {
-          publishAsActive(baseUrl, cloudDesc, shardZkNodeName, coreName);
+
+        boolean startRecovery = checkRecovery(coreName, desc, recoverReloadedCores, isLeader, cloudDesc,
+            collection, coreZkNodeName, shardId, leaderProps, core, cc);
+        if (!startRecovery) {
+          publishAsActive(baseUrl, desc, coreZkNodeName, coreName);
         }
       } finally {
         if (core != null) {
@@ -525,8 +520,7 @@ public final class ZkController {
         }
       }
     } else {
-      joinElection(collection, shardZkNodeName, shardId, leaderProps);
-      publishAsActive(baseUrl, cloudDesc, shardZkNodeName, coreName);
+      publishAsActive(baseUrl, desc, coreZkNodeName, coreName);
     }
     
     // make sure we have an update cluster state right away
@@ -547,19 +541,17 @@ public final class ZkController {
 
 
   private boolean checkRecovery(String coreName, final CoreDescriptor desc,
-      boolean recoverReloadedCores, final String baseUrl,
+      boolean recoverReloadedCores, final boolean isLeader,
       final CloudDescriptor cloudDesc, final String collection,
       final String shardZkNodeName, String shardId, ZkNodeProps leaderProps,
       SolrCore core, CoreContainer cc) throws InterruptedException,
       KeeperException, IOException, ExecutionException {
-    
-    String leaderUrl = zkStateReader.getLeaderUrl(collection,
-        cloudDesc.getShardId(), 30000);
+
     
     boolean doRecovery = true;
-    String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName);
-    log.info("We are " + ourUrl + " and leader is " + leaderUrl);
-    if (leaderUrl.equals(ourUrl)) {
+
+
+    if (isLeader) {
       doRecovery = false;
       
       // recover from local transaction log and wait for it to complete before
@@ -608,62 +600,59 @@ public final class ZkController {
 
 
   void publishAsActive(String shardUrl,
-      final CloudDescriptor cloudDesc, String shardZkNodeName, String coreName) {
+      final CoreDescriptor cd, String shardZkNodeName, String coreName) {
     Map<String,String> finalProps = new HashMap<String,String>();
     finalProps.put(ZkStateReader.BASE_URL_PROP, shardUrl);
     finalProps.put(ZkStateReader.CORE_NAME_PROP, coreName);
     finalProps.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
     finalProps.put(ZkStateReader.STATE_PROP, ZkStateReader.ACTIVE);
-    finalProps.put(ZkStateReader.SHARD_ID_PROP, cloudDesc.getShardId());
-    publishState(cloudDesc, shardZkNodeName, coreName, finalProps);
+
+    publishState(cd, shardZkNodeName, coreName, finalProps);
   }
 
   public void publish(SolrCore core, String state) {
-    CloudDescriptor cloudDesc = core.getCoreDescriptor().getCloudDescriptor();
+    CoreDescriptor cd = core.getCoreDescriptor();
     Map<String,String> finalProps = new HashMap<String,String>();
     finalProps.put(ZkStateReader.BASE_URL_PROP, getBaseUrl());
     finalProps.put(ZkStateReader.CORE_NAME_PROP, core.getName());
     finalProps.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
     finalProps.put(ZkStateReader.STATE_PROP, state);
-    finalProps.put(ZkStateReader.SHARD_ID_PROP, cloudDesc.getShardId());
-    publishState(cloudDesc, getNodeName() + "_" + core.getName(),
+    publishState(cd, getNodeName() + "_" + core.getName(),
         core.getName(), finalProps);
   }
   
   void publishAsDown(String baseUrl,
-      final CloudDescriptor cloudDesc, String shardZkNodeName, String coreName) {
+      final CoreDescriptor cd, String shardZkNodeName, String coreName) {
     Map<String,String> finalProps = new HashMap<String,String>();
     finalProps.put(ZkStateReader.BASE_URL_PROP, baseUrl);
     finalProps.put(ZkStateReader.CORE_NAME_PROP, coreName);
     finalProps.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
     finalProps.put(ZkStateReader.STATE_PROP, ZkStateReader.DOWN);
-    finalProps.put(ZkStateReader.SHARD_ID_PROP, cloudDesc.getShardId());
-    publishState(cloudDesc, shardZkNodeName, coreName, finalProps);
+ 
+    publishState(cd, shardZkNodeName, coreName, finalProps);
   }
   
   void publishAsRecoveryFailed(String baseUrl,
-      final CloudDescriptor cloudDesc, String shardZkNodeName, String coreName) {
+      final CoreDescriptor cd, String shardZkNodeName, String coreName) {
     Map<String,String> finalProps = new HashMap<String,String>();
     finalProps.put(ZkStateReader.BASE_URL_PROP, baseUrl);
     finalProps.put(ZkStateReader.CORE_NAME_PROP, coreName);
     finalProps.put(ZkStateReader.NODE_NAME_PROP, getNodeName());
     finalProps.put(ZkStateReader.STATE_PROP, ZkStateReader.RECOVERY_FAILED);
-    finalProps.put(ZkStateReader.SHARD_ID_PROP, cloudDesc.getShardId());
-    publishState(cloudDesc, shardZkNodeName, coreName, finalProps);
+    publishState(cd, shardZkNodeName, coreName, finalProps);
   }
 
 
-  private boolean getShardId(final CoreDescriptor desc,
+  private boolean needsToBeAssignedShardId(final CoreDescriptor desc,
       final CloudState state, final String shardZkNodeName) {
 
     final CloudDescriptor cloudDesc = desc.getCloudDescriptor();
     
     final String shardId = state.getShardId(shardZkNodeName);
 
-    if(shardId!=null) {
-        // TODO: we where already registered - go into recovery mode
-        cloudDesc.setShardId(shardId);
-        return false;
+    if (shardId != null) {
+      cloudDesc.setShardId(shardId);
+      return false;
     }
     return true;
   }
@@ -815,44 +804,62 @@ public final class ZkController {
   }
 
   
-  private void publishState(CloudDescriptor cloudDesc, String shardZkNodeName, String coreName,
+  private void publishState(CoreDescriptor cd, String shardZkNodeName, String coreName,
       Map<String,String> props) {
+    CloudDescriptor cloudDesc = cd.getCloudDescriptor();
+    
+    if (cloudDesc.getShardId() == null && needsToBeAssignedShardId(cd, zkStateReader.getCloudState(), shardZkNodeName)) {
+      // publish with no shard id so we are assigned one, and then look for it
+      doPublish(shardZkNodeName, coreName, props, cloudDesc);
+      String shardId;
+      try {
+        shardId = doGetShardIdProcess(coreName, cloudDesc);
+      } catch (InterruptedException e) {
+        throw new SolrException(ErrorCode.SERVER_ERROR, "Interrupted");
+      }
+      cloudDesc.setShardId(shardId);
+    }
+   
+    
+    if (!props.containsKey(ZkStateReader.SHARD_ID_PROP) && cloudDesc.getShardId() != null) {
+      props.put(ZkStateReader.SHARD_ID_PROP, cloudDesc.getShardId());
+    }
+    
+    doPublish(shardZkNodeName, coreName, props, cloudDesc);
+  }
+
+
+  private void doPublish(String shardZkNodeName, String coreName,
+      Map<String,String> props, CloudDescriptor cloudDesc) {
+
     CoreState coreState = new CoreState(coreName,
         cloudDesc.getCollectionName(), props);
     coreStates.put(shardZkNodeName, coreState);
     final String nodePath = "/node_states/" + getNodeName();
 
     try {
-      cmdExecutor.retryOperation(new ZkOperation() {
-        
-        @Override
-        public Object execute() throws KeeperException, InterruptedException {
-          zkClient.setData(
-              nodePath,
-              ZkStateReader.toJSON(coreStates.values()), true);
-          return null;
-        }
-      });
-
+      zkClient.setData(nodePath, ZkStateReader.toJSON(coreStates.values()),
+          true);
+      
     } catch (KeeperException e) {
-      throw new ZooKeeperException(
-          SolrException.ErrorCode.SERVER_ERROR,
+      throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
           "could not publish node state", e);
     } catch (InterruptedException e) {
       // Restore the interrupted status
       Thread.currentThread().interrupt();
-      throw new ZooKeeperException(
-          SolrException.ErrorCode.SERVER_ERROR,
+      throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
           "could not publish node state", e);
     }
   }
 
-  private String doGetShardIdProcess(String coreName, CloudDescriptor descriptor) throws InterruptedException {
+  private String doGetShardIdProcess(String coreName, CloudDescriptor descriptor)
+      throws InterruptedException {
     final String shardZkNodeName = getNodeName() + "_" + coreName;
     int retryCount = 120;
-    while (retryCount-->0) {
-      final String shardId = zkStateReader.getCloudState().getShardId(shardZkNodeName);
-      if(shardId!=null) {
+    while (retryCount-- > 0) {
+      final String shardId = zkStateReader.getCloudState().getShardId(
+          shardZkNodeName);
+      if (shardId != null) {
         return shardId;
       }
       try {
@@ -861,7 +868,8 @@ public final class ZkController {
         Thread.currentThread().interrupt();
       }
     }
-    throw new SolrException(ErrorCode.SERVER_ERROR, "Could not get shard_id for core: " + coreName);
+    throw new SolrException(ErrorCode.SERVER_ERROR,
+        "Could not get shard_id for core: " + coreName);
   }
   
   public static void uploadToZK(SolrZkClient zkClient, File dir, String zkPath) throws IOException, KeeperException, InterruptedException {

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java?rev=1233976&r1=1233975&r2=1233976&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/core/CoreContainer.java Fri Jan 20 16:02:54 2012
@@ -477,6 +477,12 @@ public class CoreContainer 
       throw new RuntimeException( "Invalid core name: "+name );
     }
 
+    if (zkController != null) {
+      // before becoming available, make sure we are not live and active
+      // this also gets us our assigned shard id if it was not specified
+      zkController.publish(core, ZkStateReader.DOWN);
+    }
+    
     SolrCore old = null;
     synchronized (cores) {
       old = cores.put(name, core);

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java?rev=1233976&r1=1233975&r2=1233976&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/SnapPuller.java Fri Jan 20 16:02:54 2012
@@ -244,7 +244,7 @@ public class SnapPuller {
   @SuppressWarnings("unchecked")
   boolean successfulInstall = false;
 
-  boolean fetchLatestIndex(SolrCore core, boolean force) throws IOException {
+  boolean fetchLatestIndex(SolrCore core, boolean force) throws IOException, InterruptedException {
     successfulInstall = false;
     replicationStartTime = System.currentTimeMillis();
     try {
@@ -354,6 +354,8 @@ public class SnapPuller {
         return false;
       } catch (SolrException e) {
         throw e;
+      } catch (InterruptedException e) {
+        throw new InterruptedException("Index fetch interrupted");
       } catch (Exception e) {
         throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Index fetch failed : ", e);
       } finally {

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java?rev=1233976&r1=1233975&r2=1233976&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java Fri Jan 20 16:02:54 2012
@@ -292,7 +292,7 @@ public class ChaosMonkey {
       return null;
     }
     
-    System.out.println("num active:" + numActive + " for " + slice);
+    //System.out.println("num active:" + numActive + " for " + slice);
     
     int chance = random.nextInt(10);
     JettySolrRunner jetty;

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java?rev=1233976&r1=1233975&r2=1233976&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySafeLeaderTest.java Fri Jan 20 16:02:54 2012
@@ -65,8 +65,8 @@ public class ChaosMonkeySafeLeaderTest e
   
   public ChaosMonkeySafeLeaderTest() {
     super();
-    shardCount = atLeast(3);
-    sliceCount = atLeast(2);
+    sliceCount = 2;
+    shardCount = 12;
   }
   
   @Override
@@ -81,7 +81,7 @@ public class ChaosMonkeySafeLeaderTest e
     //del("*:*");
     
     List<StopableIndexingThread> threads = new ArrayList<StopableIndexingThread>();
-    int threadCount = atLeast(2);
+    int threadCount = 2;
     for (int i = 0; i < threadCount; i++) {
       StopableIndexingThread indexThread = new StopableIndexingThread(i * 50000, true);
       threads.add(indexThread);
@@ -109,7 +109,6 @@ public class ChaosMonkeySafeLeaderTest e
     
     // try and wait for any replications and what not to finish...
     
-    // wait until there are no recoveries...
     waitForThingsToLevelOut();
 
     checkShardConsistency(true, true);
@@ -122,7 +121,7 @@ public class ChaosMonkeySafeLeaderTest e
     int cnt = 0;
     boolean retry = false;
     do {
-      waitForRecoveriesToFinish(VERBOSE);
+      waitForRecoveriesToFinish(false);
       
       commit();
       
@@ -135,11 +134,14 @@ public class ChaosMonkeySafeLeaderTest e
       }
       
       if (failMessage != null) {
-        retry  = true;
+        retry = true;
+      } else {
+        retry = false;
       }
+      
       cnt++;
       if (cnt > 10) break;
-      Thread.sleep(4000);
+      Thread.sleep(2000);
     } while (retry);
   }
   

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java?rev=1233976&r1=1233975&r2=1233976&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java Fri Jan 20 16:02:54 2012
@@ -53,18 +53,19 @@ import org.junit.Before;
 import org.junit.BeforeClass;
 
 /**
- *
- * TODO: we should still test this works as a custom update chain as well as what we test now - the default update chain
+ * 
+ * TODO: we should still test this works as a custom update chain as well as
+ * what we test now - the default update chain
  * 
  */
 public class FullSolrCloudTest extends AbstractDistributedZkTestCase {
-
+  
   private static final String SHARD2 = "shard2";
-
+  
   protected static final String DEFAULT_COLLECTION = "collection1";
-
-  String t1="a_t";
-  String i1="a_si";
+  
+  String t1 = "a_t";
+  String i1 = "a_si";
   String nint = "n_i";
   String tint = "n_ti";
   String nfloat = "n_f";
@@ -76,9 +77,9 @@ public class FullSolrCloudTest extends A
   String ndate = "n_dt";
   String tdate = "n_tdt";
   
-  String oddField="oddField_s";
-  String missingField="ignore_exception__missing_but_valid_field_t";
-  String invalidField="ignore_exception__invalid_field_not_in_schema";
+  String oddField = "oddField_s";
+  String missingField = "ignore_exception__missing_but_valid_field_t";
+  String invalidField = "ignore_exception__invalid_field_not_in_schema";
   protected int sliceCount;
   
   protected volatile CloudSolrServer cloudClient;
@@ -90,10 +91,10 @@ public class FullSolrCloudTest extends A
   private AtomicInteger jettyIntCntr = new AtomicInteger(0);
   protected ChaosMonkey chaosMonkey;
   protected volatile ZkStateReader zkStateReader;
-
+  
   private Map<String,SolrServer> shardToLeaderClient = new HashMap<String,SolrServer>();
   private Map<String,CloudJettyRunner> shardToLeaderJetty = new HashMap<String,CloudJettyRunner>();
-
+  
   class CloudJettyRunner {
     JettySolrRunner jetty;
     String nodeName;
@@ -104,8 +105,7 @@ public class FullSolrCloudTest extends A
     SolrServer client;
     String shardName;
     
-    public CloudSolrServerClient() {
-    }
+    public CloudSolrServerClient() {}
     
     public CloudSolrServerClient(SolrServer client) {
       this.client = client;
@@ -118,6 +118,7 @@ public class FullSolrCloudTest extends A
       result = prime * result + ((client == null) ? 0 : client.hashCode());
       return result;
     }
+    
     @Override
     public boolean equals(Object obj) {
       if (this == obj) return true;
@@ -129,7 +130,7 @@ public class FullSolrCloudTest extends A
       } else if (!client.equals(other.client)) return false;
       return true;
     }
-
+    
   }
   
   @Before
@@ -141,7 +142,8 @@ public class FullSolrCloudTest extends A
   
   @BeforeClass
   public static void beforeClass() throws Exception {
-    System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
+    System
+        .setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
     System.setProperty("solrcloud.update.delay", "0");
     System.setProperty("enable.update.log", "true");
     System.setProperty("remove.version.field", "true");
@@ -160,7 +162,7 @@ public class FullSolrCloudTest extends A
     
     shardCount = 4;
     sliceCount = 2;
-    // TODO: for now, turn off stress because it uses regular clients, and we 
+    // TODO: for now, turn off stress because it uses regular clients, and we
     // need the cloud client because we kill servers
     stress = 0;
   }
@@ -178,28 +180,30 @@ public class FullSolrCloudTest extends A
       }
       
       chaosMonkey = new ChaosMonkey(zkServer, zkStateReader,
-          DEFAULT_COLLECTION, shardToJetty, shardToClient, shardToLeaderClient, shardToLeaderJetty,
-          random);
+          DEFAULT_COLLECTION, shardToJetty, shardToClient, shardToLeaderClient,
+          shardToLeaderJetty, random);
     }
-
+    
     // wait until shards have started registering...
-    while(!zkStateReader.getCloudState().getCollections().contains(DEFAULT_COLLECTION)) {
+    while (!zkStateReader.getCloudState().getCollections()
+        .contains(DEFAULT_COLLECTION)) {
       Thread.sleep(500);
     }
-    while(zkStateReader.getCloudState().getSlices(DEFAULT_COLLECTION).size() != sliceCount) {
+    while (zkStateReader.getCloudState().getSlices(DEFAULT_COLLECTION).size() != sliceCount) {
       Thread.sleep(500);
     }
     
     // use the distributed solrj client
     if (cloudClient == null) {
-      synchronized(this) {
+      synchronized (this) {
         if (cloudClient != null) {
           return;
         }
         try {
           CloudSolrServer server = new CloudSolrServer(zkServer.getZkAddress());
           server.setDefaultCollection(DEFAULT_COLLECTION);
-          server.getLbServer().getHttpClient().getParams().setConnectionManagerTimeout(5000);
+          server.getLbServer().getHttpClient().getParams()
+              .setConnectionManagerTimeout(5000);
           server.getLbServer().getHttpClient().getParams().setSoTimeout(5000);
           cloudClient = server;
         } catch (MalformedURLException e) {
@@ -213,14 +217,15 @@ public class FullSolrCloudTest extends A
   protected void createServers(int numServers) throws Exception {
     
     System.setProperty("collection", "control_collection");
-    controlJetty = createJetty(testDir, testDir + "/control/data", "control_shard");
+    controlJetty = createJetty(testDir, testDir + "/control/data",
+        "control_shard");
     System.clearProperty("collection");
     controlClient = createNewSolrServer(controlJetty.getLocalPort());
-
+    
     createJettys(numServers);
-
+    
   }
-
+  
   private List<JettySolrRunner> createJettys(int numJettys) throws Exception,
       InterruptedException, TimeoutException, IOException, KeeperException,
       URISyntaxException {
@@ -229,12 +234,13 @@ public class FullSolrCloudTest extends A
     StringBuilder sb = new StringBuilder();
     for (int i = 1; i <= numJettys; i++) {
       if (sb.length() > 0) sb.append(',');
-      JettySolrRunner j = createJetty(testDir, testDir + "/jetty" + this.jettyIntCntr.incrementAndGet(), null, "solrconfig.xml", null);
+      JettySolrRunner j = createJetty(testDir, testDir + "/jetty"
+          + this.jettyIntCntr.incrementAndGet(), null, "solrconfig.xml", null);
       jettys.add(j);
       SolrServer client = createNewSolrServer(j.getLocalPort());
       clients.add(client);
     }
-
+    
     initCloud();
     
     this.jettys.addAll(jettys);
@@ -243,9 +249,9 @@ public class FullSolrCloudTest extends A
     updateMappingsFromZk(this.jettys, this.clients);
     
     // build the shard string
-    for (int i = 1; i <= numJettys/2; i++) {
+    for (int i = 1; i <= numJettys / 2; i++) {
       JettySolrRunner j = this.jettys.get(i);
-      JettySolrRunner j2 = this.jettys.get(i + (numJettys/2 - 1));
+      JettySolrRunner j2 = this.jettys.get(i + (numJettys / 2 - 1));
       if (sb.length() > 0) sb.append(',');
       sb.append("localhost:").append(j.getLocalPort()).append(context);
       sb.append("|localhost:").append(j2.getLocalPort()).append(context);
@@ -255,19 +261,21 @@ public class FullSolrCloudTest extends A
     return jettys;
   }
   
-  public JettySolrRunner createJetty(String dataDir, String shardList, String solrConfigOverride) throws Exception {
-
-    JettySolrRunner jetty = new JettySolrRunner(getSolrHome(), "/solr", 0, solrConfigOverride, null, false);
+  public JettySolrRunner createJetty(String dataDir, String shardList,
+      String solrConfigOverride) throws Exception {
+    
+    JettySolrRunner jetty = new JettySolrRunner(getSolrHome(), "/solr", 0,
+        solrConfigOverride, null, false);
     jetty.setShards(shardList);
     jetty.setDataDir(dataDir);
     jetty.start();
-
+    
     return jetty;
   }
-
+  
   protected void updateMappingsFromZk(List<JettySolrRunner> jettys,
-      List<SolrServer> clients) throws Exception,
-      IOException, KeeperException, URISyntaxException {
+      List<SolrServer> clients) throws Exception, IOException, KeeperException,
+      URISyntaxException {
     zkStateReader.updateCloudState(true);
     shardToClient.clear();
     shardToJetty.clear();
@@ -275,26 +283,28 @@ public class FullSolrCloudTest extends A
     
     for (SolrServer client : clients) {
       // find info for this client in zk
-
+      
       CloudState cloudState = zkStateReader.getCloudState();
-      Map<String,Slice> slices = cloudState.getSlices(
-          DEFAULT_COLLECTION);
+      Map<String,Slice> slices = cloudState.getSlices(DEFAULT_COLLECTION);
       
       if (slices == null) {
-        throw new RuntimeException("No slices found for collection " + DEFAULT_COLLECTION + " in " + cloudState.getCollections());
+        throw new RuntimeException("No slices found for collection "
+            + DEFAULT_COLLECTION + " in " + cloudState.getCollections());
       }
       
       // we find ou state by simply matching ports...
       for (Map.Entry<String,Slice> slice : slices.entrySet()) {
         Map<String,ZkNodeProps> theShards = slice.getValue().getShards();
         for (Map.Entry<String,ZkNodeProps> shard : theShards.entrySet()) {
-          int port = new URI(((CommonsHttpSolrServer) client).getBaseURL()).getPort();
-
+          int port = new URI(((CommonsHttpSolrServer) client).getBaseURL())
+              .getPort();
+          
           if (shard.getKey().contains(":" + port + "_")) {
             CloudSolrServerClient csc = new CloudSolrServerClient();
             csc.client = client;
             csc.shardName = shard.getValue().get(ZkStateReader.NODE_NAME_PROP);
-            boolean isLeader = shard.getValue().containsKey(ZkStateReader.LEADER_PROP);
+            boolean isLeader = shard.getValue().containsKey(
+                ZkStateReader.LEADER_PROP);
             clientToInfo.put(csc, shard.getValue());
             List<SolrServer> list = shardToClient.get(slice.getKey());
             if (list == null) {
@@ -316,12 +326,11 @@ public class FullSolrCloudTest extends A
         DEFAULT_COLLECTION);
     
     for (JettySolrRunner jetty : jettys) {
-
       for (Map.Entry<String,Slice> slice : slices.entrySet()) {
         Map<String,ZkNodeProps> theShards = slice.getValue().getShards();
         for (Map.Entry<String,ZkNodeProps> shard : theShards.entrySet()) {
           int port = jetty.getLocalPort();
-
+         
           if (shard.getKey().contains(":" + port + "_")) {
             jettyToInfo.put(jetty, shard.getValue());
             List<CloudJettyRunner> list = shardToJetty.get(slice.getKey());
@@ -329,7 +338,8 @@ public class FullSolrCloudTest extends A
               list = new ArrayList<CloudJettyRunner>();
               shardToJetty.put(slice.getKey(), list);
             }
-            boolean isLeader = shard.getValue().containsKey(ZkStateReader.LEADER_PROP);
+            boolean isLeader = shard.getValue().containsKey(
+                ZkStateReader.LEADER_PROP);
             CloudJettyRunner cjr = new CloudJettyRunner();
             cjr.jetty = jetty;
             cjr.nodeName = shard.getValue().get(ZkStateReader.NODE_NAME_PROP);
@@ -346,68 +356,74 @@ public class FullSolrCloudTest extends A
   
   @Override
   protected void setDistributedParams(ModifiableSolrParams params) {
-
+    
     if (r.nextBoolean()) {
       // don't set shards, let that be figured out from the cloud state
     } else {
       // use shard ids rather than physical locations
       StringBuilder sb = new StringBuilder();
-      for (int i = 0; i < sliceCount ; i++) {
-        if (i > 0)
-          sb.append(',');
-        sb.append("shard" + (i+1));
+      for (int i = 0; i < sliceCount; i++) {
+        if (i > 0) sb.append(',');
+        sb.append("shard" + (i + 1));
       }
       params.set("shards", sb.toString());
     }
   }
   
   @Override
-  protected void indexDoc(SolrInputDocument doc) throws IOException, SolrServerException {
+  protected void indexDoc(SolrInputDocument doc) throws IOException,
+      SolrServerException {
     controlClient.add(doc);
-
-//    if we wanted to randomly pick a client - but sometimes they may be down...
     
-//    boolean pick = random.nextBoolean();
-//    
-//    int which = (doc.getField(id).toString().hashCode() & 0x7fffffff) % sliceCount;
-//    
-//    if (pick && sliceCount > 1) {
-//      which = which + ((shardCount / sliceCount) * random.nextInt(sliceCount-1));
-//    }
-//    
-//    CommonsHttpSolrServer client = (CommonsHttpSolrServer) clients.get(which);
-
+    // if we wanted to randomly pick a client - but sometimes they may be
+    // down...
+    
+    // boolean pick = random.nextBoolean();
+    //
+    // int which = (doc.getField(id).toString().hashCode() & 0x7fffffff) %
+    // sliceCount;
+    //
+    // if (pick && sliceCount > 1) {
+    // which = which + ((shardCount / sliceCount) *
+    // random.nextInt(sliceCount-1));
+    // }
+    //
+    // CommonsHttpSolrServer client = (CommonsHttpSolrServer)
+    // clients.get(which);
     
     UpdateRequest ureq = new UpdateRequest();
     ureq.add(doc);
-    //ureq.setParam(UpdateParams.UPDATE_CHAIN, DISTRIB_UPDATE_CHAIN);
+    // ureq.setParam(UpdateParams.UPDATE_CHAIN, DISTRIB_UPDATE_CHAIN);
     ureq.process(cloudClient);
   }
   
-  protected void index_specific(int serverNumber, Object... fields) throws Exception {
+  protected void index_specific(int serverNumber, Object... fields)
+      throws Exception {
     SolrInputDocument doc = new SolrInputDocument();
     for (int i = 0; i < fields.length; i += 2) {
       doc.addField((String) (fields[i]), fields[i + 1]);
     }
     controlClient.add(doc);
-
-    CommonsHttpSolrServer client = (CommonsHttpSolrServer) clients.get(serverNumber);
-
+    
+    CommonsHttpSolrServer client = (CommonsHttpSolrServer) clients
+        .get(serverNumber);
+    
     UpdateRequest ureq = new UpdateRequest();
     ureq.add(doc);
-    //ureq.setParam("update.chain", DISTRIB_UPDATE_CHAIN);
+    // ureq.setParam("update.chain", DISTRIB_UPDATE_CHAIN);
     ureq.process(client);
   }
   
-  protected void index_specific(SolrServer client, Object... fields) throws Exception {
+  protected void index_specific(SolrServer client, Object... fields)
+      throws Exception {
     SolrInputDocument doc = new SolrInputDocument();
     for (int i = 0; i < fields.length; i += 2) {
       doc.addField((String) (fields[i]), fields[i + 1]);
     }
-
+    
     UpdateRequest ureq = new UpdateRequest();
     ureq.add(doc);
-    //ureq.setParam("update.chain", DISTRIB_UPDATE_CHAIN);
+    // ureq.setParam("update.chain", DISTRIB_UPDATE_CHAIN);
     ureq.process(client);
     
     // add to control second in case adding to shards fails
@@ -418,12 +434,14 @@ public class FullSolrCloudTest extends A
     controlClient.deleteByQuery(q);
     for (SolrServer client : clients) {
       UpdateRequest ureq = new UpdateRequest();
-      //ureq.setParam("update.chain", DISTRIB_UPDATE_CHAIN);
+      // ureq.setParam("update.chain", DISTRIB_UPDATE_CHAIN);
       ureq.deleteByQuery(q).process(client);
     }
   }// serial commit...
   
-  /* (non-Javadoc)
+  /*
+   * (non-Javadoc)
+   * 
    * @see org.apache.solr.BaseDistributedSearchTestCase#doTest()
    * 
    * Create 3 shards, each with one replica
@@ -434,39 +452,40 @@ public class FullSolrCloudTest extends A
     handle.put("QTime", SKIPVAL);
     handle.put("timestamp", SKIPVAL);
     
-    indexr(id,1, i1, 100, tlong, 100,t1,"now is the time for all good men"
-            ,"foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
+    indexr(id, 1, i1, 100, tlong, 100, t1, "now is the time for all good men",
+        "foo_f", 1.414f, "foo_b", "true", "foo_d", 1.414d);
     
     // make sure we are in a steady state...
-    waitForRecoveriesToFinish(VERBOSE);
+    waitForRecoveriesToFinish(true);
     
     commit();
     
-    assertDocCounts(VERBOSE);
+    assertDocCounts(true);
     
     indexAbunchOfDocs();
-
+    
     commit();
     
     assertDocCounts(VERBOSE);
     checkQueries();
     
     assertDocCounts(VERBOSE);
-
+    
     query("q", "*:*", "sort", "n_tl1 desc");
     
     brindDownShardIndexSomeDocsAndRecover();
     
     query("q", "*:*", "sort", "n_tl1 desc");
     
-    // test adding another replica to a shard - it should do a recovery/replication to pick up the index from the leader
+    // test adding another replica to a shard - it should do a
+    // recovery/replication to pick up the index from the leader
     addNewReplica();
     
     long docId = testUpdateAndDelete();
     
     // index a bad doc...
     try {
-      indexr(t1,"a doc with no id");
+      indexr(t1, "a doc with no id");
       fail("this should fail");
     } catch (SolrException e) {
       // expected
@@ -474,7 +493,8 @@ public class FullSolrCloudTest extends A
     
     // TODO: bring this to it's own method?
     // try indexing to a leader that has no replicas up
-    ZkNodeProps leaderProps = zkStateReader.getLeaderProps(DEFAULT_COLLECTION, SHARD2);
+    ZkNodeProps leaderProps = zkStateReader.getLeaderProps(DEFAULT_COLLECTION,
+        SHARD2);
     
     String nodeName = leaderProps.get(ZkStateReader.NODE_NAME_PROP);
     chaosMonkey.stopShardExcept(SHARD2, nodeName);
@@ -494,7 +514,7 @@ public class FullSolrCloudTest extends A
     checkShardConsistency("shard1");
     
   }
-
+  
   private long testUpdateAndDelete() throws Exception, SolrServerException,
       IOException {
     long docId = 99999999L;
@@ -521,7 +541,7 @@ public class FullSolrCloudTest extends A
     assertEquals(1, results.getResults().getNumFound());
     
     UpdateRequest uReq = new UpdateRequest();
-    //uReq.setParam(UpdateParams.UPDATE_CHAIN, DISTRIB_UPDATE_CHAIN);
+    // uReq.setParam(UpdateParams.UPDATE_CHAIN, DISTRIB_UPDATE_CHAIN);
     uReq.deleteById(Long.toString(docId)).process(clients.get(0));
     
     commit();
@@ -530,7 +550,7 @@ public class FullSolrCloudTest extends A
     assertEquals(0, results.getResults().getNumFound());
     return docId;
   }
-
+  
   private void addNewReplica() throws Exception, InterruptedException,
       TimeoutException, IOException, KeeperException, URISyntaxException,
       SolrServerException {
@@ -541,21 +561,22 @@ public class FullSolrCloudTest extends A
     // new server should be part of first shard
     // how many docs are on the new shard?
     for (SolrServer client : shardToClient.get("shard1")) {
-      if (VERBOSE) System.out.println("total:" + client.query(new SolrQuery("*:*")).getResults().getNumFound());
+      if (VERBOSE) System.out.println("total:"
+          + client.query(new SolrQuery("*:*")).getResults().getNumFound());
     }
-
+    
     checkShardConsistency("shard1");
-
+    
     assertDocCounts(VERBOSE);
   }
-
-  protected void waitForRecoveriesToFinish(boolean verbose) throws KeeperException,
-      InterruptedException {
+  
+  protected void waitForRecoveriesToFinish(boolean verbose)
+      throws KeeperException, InterruptedException {
     boolean cont = true;
     int cnt = 0;
     
     while (cont) {
-      if (VERBOSE) System.out.println("-");
+      if (verbose) System.out.println("-");
       boolean sawLiveRecovering = false;
       zkStateReader.updateCloudState(true);
       CloudState cloudState = zkStateReader.getCloudState();
@@ -563,15 +584,14 @@ public class FullSolrCloudTest extends A
       for (Map.Entry<String,Slice> entry : slices.entrySet()) {
         Map<String,ZkNodeProps> shards = entry.getValue().getShards();
         for (Map.Entry<String,ZkNodeProps> shard : shards.entrySet()) {
-          if (VERBOSE) System.out.println("rstate:"
+          if (verbose) System.out.println("rstate:"
               + shard.getValue().get(ZkStateReader.STATE_PROP)
               + " live:"
               + cloudState.liveNodesContain(shard.getValue().get(
                   ZkStateReader.NODE_NAME_PROP)));
           String state = shard.getValue().get(ZkStateReader.STATE_PROP);
-          if ((state.equals(ZkStateReader.RECOVERING) || state
-              .equals(ZkStateReader.SYNC) || state
-              .equals(ZkStateReader.DOWN))
+          if ((state.equals(ZkStateReader.RECOVERING)
+              || state.equals(ZkStateReader.SYNC))
               && cloudState.liveNodesContain(shard.getValue().get(
                   ZkStateReader.NODE_NAME_PROP))) {
             sawLiveRecovering = true;
@@ -580,9 +600,10 @@ public class FullSolrCloudTest extends A
       }
       if (!sawLiveRecovering || cnt == 10) {
         if (!sawLiveRecovering) {
-          if (VERBOSE) System.out.println("no one is recoverying");
+          if (verbose) System.out.println("no one is recoverying");
         } else {
-          if (VERBOSE) System.out.println("gave up waiting for recovery to finish..");
+          if (verbose) System.out
+              .println("gave up waiting for recovery to finish..");
         }
         cont = false;
       } else {
@@ -591,7 +612,7 @@ public class FullSolrCloudTest extends A
       cnt++;
     }
   }
-
+  
   private void brindDownShardIndexSomeDocsAndRecover() throws Exception,
       SolrServerException, IOException, InterruptedException {
     
@@ -605,7 +626,7 @@ public class FullSolrCloudTest extends A
     try {
       // TODO: ignore fail
       index_specific(shardToClient.get(SHARD2).get(0), id, 999, i1, 107, t1,
-        "specific doc!");
+          "specific doc!");
       fail("This server should be down and this update should have failed");
     } catch (SolrServerException e) {
       // expected..
@@ -614,14 +635,15 @@ public class FullSolrCloudTest extends A
     commit();
     query("q", "*:*", "sort", "n_tl1 desc");
     
-//    long cloudClientDocs = cloudClient.query(new SolrQuery("*:*")).getResults().getNumFound();
-//    System.out.println("clouddocs:" + cloudClientDocs);
+    // long cloudClientDocs = cloudClient.query(new
+    // SolrQuery("*:*")).getResults().getNumFound();
+    // System.out.println("clouddocs:" + cloudClientDocs);
     
     // try to index to a living shard at shard2
     // TODO: this can fail with connection refused !????
     index_specific(shardToClient.get(SHARD2).get(1), id, 1000, i1, 108, t1,
         "specific doc!");
-
+    
     commit();
     
     checkShardConsistency(true, true);
@@ -637,10 +659,10 @@ public class FullSolrCloudTest extends A
     doc.addField("id", 1001);
     
     controlClient.add(doc);
-
+    
     UpdateRequest ureq = new UpdateRequest();
     ureq.add(doc);
-    //ureq.setParam("update.chain", DISTRIB_UPDATE_CHAIN);
+    // ureq.setParam("update.chain", DISTRIB_UPDATE_CHAIN);
     ureq.process(cloudClient);
     
     commit();
@@ -654,25 +676,26 @@ public class FullSolrCloudTest extends A
     
     // test debugging
     testDebugQueries();
-
     
     if (VERBOSE) {
-      System.out.println(controlClient.query(new SolrQuery("*:*")).getResults().getNumFound());
-    
-    for (SolrServer client : clients) {
-      try {
-        System.out.println(client.query(new SolrQuery("*:*")).getResults().getNumFound());
-      } catch(Exception e) {
-        
+      System.out.println(controlClient.query(new SolrQuery("*:*")).getResults()
+          .getNumFound());
+      
+      for (SolrServer client : clients) {
+        try {
+          System.out.println(client.query(new SolrQuery("*:*")).getResults()
+              .getNumFound());
+        } catch (Exception e) {
+          
+        }
       }
     }
-    }
     // TODO: This test currently fails because debug info is obtained only
     // on shards with matches.
     // query("q","matchesnothing","fl","*,score", "debugQuery", "true");
-
+    
     // this should trigger a recovery phase on deadShard
-
+    
     deadShard.start(true);
     
     // make sure we have published we are recoverying
@@ -681,173 +704,211 @@ public class FullSolrCloudTest extends A
     waitForRecoveriesToFinish(false);
     
     List<SolrServer> s2c = shardToClient.get(SHARD2);
-
+    
     // if we properly recovered, we should now have the couple missing docs that
     // came in while shard was down
     assertEquals(s2c.get(0).query(new SolrQuery("*:*")).getResults()
-        .getNumFound(), s2c.get(1).query(new SolrQuery("*:*"))
-        .getResults().getNumFound());
+        .getNumFound(), s2c.get(1).query(new SolrQuery("*:*")).getResults()
+        .getNumFound());
   }
-
+  
   private void testDebugQueries() throws Exception {
     handle.put("explain", UNORDERED);
     handle.put("debug", UNORDERED);
     handle.put("time", SKIPVAL);
-    query("q","now their fox sat had put","fl","*,score",CommonParams.DEBUG_QUERY, "true");
+    query("q", "now their fox sat had put", "fl", "*,score",
+        CommonParams.DEBUG_QUERY, "true");
     query("q", "id:[1 TO 5]", CommonParams.DEBUG_QUERY, "true");
     query("q", "id:[1 TO 5]", CommonParams.DEBUG, CommonParams.TIMING);
     query("q", "id:[1 TO 5]", CommonParams.DEBUG, CommonParams.RESULTS);
     query("q", "id:[1 TO 5]", CommonParams.DEBUG, CommonParams.QUERY);
   }
-
+  
   private void checkQueries() throws Exception {
     query("q", "*:*", "sort", "n_tl1 desc");
-
+    
     // random value sort
     for (String f : fieldNames) {
-      query("q","*:*", "sort",f+" desc");
-      query("q","*:*", "sort",f+" asc");
+      query("q", "*:*", "sort", f + " desc");
+      query("q", "*:*", "sort", f + " asc");
     }
-
+    
     // these queries should be exactly ordered and scores should exactly match
-    query("q","*:*", "sort",i1+" desc");
-    query("q","*:*", "sort",i1+" asc");
-    query("q","*:*", "sort",i1+" desc", "fl","*,score");
-    query("q","*:*", "sort","n_tl1 asc", "fl","score");  // test legacy behavior - "score"=="*,score"
-    query("q","*:*", "sort","n_tl1 desc");
+    query("q", "*:*", "sort", i1 + " desc");
+    query("q", "*:*", "sort", i1 + " asc");
+    query("q", "*:*", "sort", i1 + " desc", "fl", "*,score");
+    query("q", "*:*", "sort", "n_tl1 asc", "fl", "score"); // test legacy
+                                                           // behavior -
+                                                           // "score"=="*,score"
+    query("q", "*:*", "sort", "n_tl1 desc");
     handle.put("maxScore", SKIPVAL);
-    query("q","{!func}"+i1);// does not expect maxScore. So if it comes ,ignore it. JavaBinCodec.writeSolrDocumentList()
-    //is agnostic of request params.
+    query("q", "{!func}" + i1);// does not expect maxScore. So if it comes
+                               // ,ignore it.
+                               // JavaBinCodec.writeSolrDocumentList()
+    // is agnostic of request params.
     handle.remove("maxScore");
-    query("q","{!func}"+i1, "fl","*,score");  // even scores should match exactly here
-
+    query("q", "{!func}" + i1, "fl", "*,score"); // even scores should match
+                                                 // exactly here
+    
     handle.put("highlighting", UNORDERED);
     handle.put("response", UNORDERED);
-
+    
     handle.put("maxScore", SKIPVAL);
-    query("q","quick");
-    query("q","all","fl","id","start","0");
-    query("q","all","fl","foofoofoo","start","0");  // no fields in returned docs
-    query("q","all","fl","id","start","100");
-
+    query("q", "quick");
+    query("q", "all", "fl", "id", "start", "0");
+    query("q", "all", "fl", "foofoofoo", "start", "0"); // no fields in returned
+                                                        // docs
+    query("q", "all", "fl", "id", "start", "100");
+    
     handle.put("score", SKIPVAL);
-    query("q","quick","fl","*,score");
-    query("q","all","fl","*,score","start","1");
-    query("q","all","fl","*,score","start","100");
-
-    query("q","now their fox sat had put","fl","*,score",
-            "hl","true","hl.fl",t1);
-
-    query("q","now their fox sat had put","fl","foofoofoo",
-            "hl","true","hl.fl",t1);
-
-    query("q","matchesnothing","fl","*,score");  
-
-    query("q","*:*", "rows",100, "facet","true", "facet.field",t1);
-    query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","count");
-    query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","count", "facet.mincount",2);
-    query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","index");
-    query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.limit",-1, "facet.sort","index", "facet.mincount",2);
-    query("q","*:*", "rows",100, "facet","true", "facet.field",t1,"facet.limit",1);
-    query("q","*:*", "rows",100, "facet","true", "facet.query","quick", "facet.query","all", "facet.query","*:*");
-    query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.offset",1);
-    query("q","*:*", "rows",100, "facet","true", "facet.field",t1, "facet.mincount",2);
-
+    query("q", "quick", "fl", "*,score");
+    query("q", "all", "fl", "*,score", "start", "1");
+    query("q", "all", "fl", "*,score", "start", "100");
+    
+    query("q", "now their fox sat had put", "fl", "*,score", "hl", "true",
+        "hl.fl", t1);
+    
+    query("q", "now their fox sat had put", "fl", "foofoofoo", "hl", "true",
+        "hl.fl", t1);
+    
+    query("q", "matchesnothing", "fl", "*,score");
+    
+    query("q", "*:*", "rows", 100, "facet", "true", "facet.field", t1);
+    query("q", "*:*", "rows", 100, "facet", "true", "facet.field", t1,
+        "facet.limit", -1, "facet.sort", "count");
+    query("q", "*:*", "rows", 100, "facet", "true", "facet.field", t1,
+        "facet.limit", -1, "facet.sort", "count", "facet.mincount", 2);
+    query("q", "*:*", "rows", 100, "facet", "true", "facet.field", t1,
+        "facet.limit", -1, "facet.sort", "index");
+    query("q", "*:*", "rows", 100, "facet", "true", "facet.field", t1,
+        "facet.limit", -1, "facet.sort", "index", "facet.mincount", 2);
+    query("q", "*:*", "rows", 100, "facet", "true", "facet.field", t1,
+        "facet.limit", 1);
+    query("q", "*:*", "rows", 100, "facet", "true", "facet.query", "quick",
+        "facet.query", "all", "facet.query", "*:*");
+    query("q", "*:*", "rows", 100, "facet", "true", "facet.field", t1,
+        "facet.offset", 1);
+    query("q", "*:*", "rows", 100, "facet", "true", "facet.field", t1,
+        "facet.mincount", 2);
+    
     // test faceting multiple things at once
-    query("q","*:*", "rows",100, "facet","true", "facet.query","quick", "facet.query","all", "facet.query","*:*"
-    ,"facet.field",t1);
-
-    // test filter tagging, facet exclusion, and naming (multi-select facet support)
-    query("q","*:*", "rows",100, "facet","true", "facet.query","{!key=myquick}quick", "facet.query","{!key=myall ex=a}all", "facet.query","*:*"
-    ,"facet.field","{!key=mykey ex=a}"+t1
-    ,"facet.field","{!key=other ex=b}"+t1
-    ,"facet.field","{!key=again ex=a,b}"+t1
-    ,"facet.field",t1
-    ,"fq","{!tag=a}id:[1 TO 7]", "fq","{!tag=b}id:[3 TO 9]"
-    );
-    query("q", "*:*", "facet", "true", "facet.field", "{!ex=t1}SubjectTerms_mfacet", "fq", "{!tag=t1}SubjectTerms_mfacet:(test 1)", "facet.limit", "10", "facet.mincount", "1");
-
+    query("q", "*:*", "rows", 100, "facet", "true", "facet.query", "quick",
+        "facet.query", "all", "facet.query", "*:*", "facet.field", t1);
+    
+    // test filter tagging, facet exclusion, and naming (multi-select facet
+    // support)
+    query("q", "*:*", "rows", 100, "facet", "true", "facet.query",
+        "{!key=myquick}quick", "facet.query", "{!key=myall ex=a}all",
+        "facet.query", "*:*", "facet.field", "{!key=mykey ex=a}" + t1,
+        "facet.field", "{!key=other ex=b}" + t1, "facet.field",
+        "{!key=again ex=a,b}" + t1, "facet.field", t1, "fq",
+        "{!tag=a}id:[1 TO 7]", "fq", "{!tag=b}id:[3 TO 9]");
+    query("q", "*:*", "facet", "true", "facet.field",
+        "{!ex=t1}SubjectTerms_mfacet", "fq",
+        "{!tag=t1}SubjectTerms_mfacet:(test 1)", "facet.limit", "10",
+        "facet.mincount", "1");
+    
     // test field that is valid in schema but missing in all shards
-    query("q","*:*", "rows",100, "facet","true", "facet.field",missingField, "facet.mincount",2);
+    query("q", "*:*", "rows", 100, "facet", "true", "facet.field",
+        missingField, "facet.mincount", 2);
     // test field that is valid in schema and missing in some shards
-    query("q","*:*", "rows",100, "facet","true", "facet.field",oddField, "facet.mincount",2);
-
-    query("q","*:*", "sort",i1+" desc", "stats", "true", "stats.field", i1);
-
-
-    // Try to get better coverage for refinement queries by turning off over requesting.
-    // This makes it much more likely that we may not get the top facet values and hence
+    query("q", "*:*", "rows", 100, "facet", "true", "facet.field", oddField,
+        "facet.mincount", 2);
+    
+    query("q", "*:*", "sort", i1 + " desc", "stats", "true", "stats.field", i1);
+    
+    // Try to get better coverage for refinement queries by turning off over
+    // requesting.
+    // This makes it much more likely that we may not get the top facet values
+    // and hence
     // we turn of that checking.
-    handle.put("facet_fields", SKIPVAL);    
-    query("q","*:*", "rows",0, "facet","true", "facet.field",t1,"facet.limit",5, "facet.shard.limit",5);
+    handle.put("facet_fields", SKIPVAL);
+    query("q", "*:*", "rows", 0, "facet", "true", "facet.field", t1,
+        "facet.limit", 5, "facet.shard.limit", 5);
     // check a complex key name
-    query("q","*:*", "rows",0, "facet","true", "facet.field","{!key='a b/c \\' \\} foo'}"+t1,"facet.limit",5, "facet.shard.limit",5);
+    query("q", "*:*", "rows", 0, "facet", "true", "facet.field",
+        "{!key='a b/c \\' \\} foo'}" + t1, "facet.limit", 5,
+        "facet.shard.limit", 5);
     handle.remove("facet_fields");
-
+    
     query("q", "*:*", "sort", "n_tl1 desc");
-
+    
     // index the same document to two shards and make sure things
     // don't blow up.
     // assumes first n clients are first n shards
-    if (clients.size()>=2) {
-      index(id,100, i1, 107 ,t1,"oh no, a duplicate!");
-      for (int i=0; i<shardCount; i++) {
-        index_specific(i, id,100, i1, 107 ,t1,"oh no, a duplicate!");
+    if (clients.size() >= 2) {
+      index(id, 100, i1, 107, t1, "oh no, a duplicate!");
+      for (int i = 0; i < shardCount; i++) {
+        index_specific(i, id, 100, i1, 107, t1, "oh no, a duplicate!");
       }
       commit();
-      query("q","duplicate", "hl","true", "hl.fl", t1);
-      query("q","fox duplicate horses", "hl","true", "hl.fl", t1);
-      query("q","*:*", "rows",100);
+      query("q", "duplicate", "hl", "true", "hl.fl", t1);
+      query("q", "fox duplicate horses", "hl", "true", "hl.fl", t1);
+      query("q", "*:*", "rows", 100);
     }
   }
-
+  
   private void indexAbunchOfDocs() throws Exception {
-    indexr(id,2, i1, 50 , tlong, 50,t1,"to come to the aid of their country."
-    );
-    indexr(id,3, i1, 2, tlong, 2,t1,"how now brown cow"
-    );
-    indexr(id,4, i1, -100 ,tlong, 101,t1,"the quick fox jumped over the lazy dog"
-    );
-    indexr(id,5, i1, 500, tlong, 500 ,t1,"the quick fox jumped way over the lazy dog"
-    );
-    indexr(id,6, i1, -600, tlong, 600 ,t1,"humpty dumpy sat on a wall");
-    indexr(id,7, i1, 123, tlong, 123 ,t1,"humpty dumpy had a great fall");
-    indexr(id,8, i1, 876, tlong, 876,t1,"all the kings horses and all the kings men");
-    indexr(id,9, i1, 7, tlong, 7,t1,"couldn't put humpty together again");
-    indexr(id,10, i1, 4321, tlong, 4321,t1,"this too shall pass");
-    indexr(id,11, i1, -987, tlong, 987,t1,"An eye for eye only ends up making the whole world blind.");
-    indexr(id,12, i1, 379, tlong, 379,t1,"Great works are performed, not by strength, but by perseverance.");
-    indexr(id,13, i1, 232, tlong, 232,t1,"no eggs on wall, lesson learned", oddField, "odd man out");
-
-    indexr(id, 14, "SubjectTerms_mfacet", new String[]  {"mathematical models", "mathematical analysis"});
-    indexr(id, 15, "SubjectTerms_mfacet", new String[]  {"test 1", "test 2", "test3"});
-    indexr(id, 16, "SubjectTerms_mfacet", new String[]  {"test 1", "test 2", "test3"});
+    indexr(id, 2, i1, 50, tlong, 50, t1, "to come to the aid of their country.");
+    indexr(id, 3, i1, 2, tlong, 2, t1, "how now brown cow");
+    indexr(id, 4, i1, -100, tlong, 101, t1,
+        "the quick fox jumped over the lazy dog");
+    indexr(id, 5, i1, 500, tlong, 500, t1,
+        "the quick fox jumped way over the lazy dog");
+    indexr(id, 6, i1, -600, tlong, 600, t1, "humpty dumpy sat on a wall");
+    indexr(id, 7, i1, 123, tlong, 123, t1, "humpty dumpy had a great fall");
+    indexr(id, 8, i1, 876, tlong, 876, t1,
+        "all the kings horses and all the kings men");
+    indexr(id, 9, i1, 7, tlong, 7, t1, "couldn't put humpty together again");
+    indexr(id, 10, i1, 4321, tlong, 4321, t1, "this too shall pass");
+    indexr(id, 11, i1, -987, tlong, 987, t1,
+        "An eye for eye only ends up making the whole world blind.");
+    indexr(id, 12, i1, 379, tlong, 379, t1,
+        "Great works are performed, not by strength, but by perseverance.");
+    indexr(id, 13, i1, 232, tlong, 232, t1, "no eggs on wall, lesson learned",
+        oddField, "odd man out");
+    
+    indexr(id, 14, "SubjectTerms_mfacet", new String[] {"mathematical models",
+        "mathematical analysis"});
+    indexr(id, 15, "SubjectTerms_mfacet", new String[] {"test 1", "test 2",
+        "test3"});
+    indexr(id, 16, "SubjectTerms_mfacet", new String[] {"test 1", "test 2",
+        "test3"});
     String[] vals = new String[100];
-    for (int i=0; i<100; i++) {
+    for (int i = 0; i < 100; i++) {
       vals[i] = "test " + i;
     }
     indexr(id, 17, "SubjectTerms_mfacet", vals);
-
-    for (int i=100; i<150; i++) {
-      indexr(id, i);      
+    
+    for (int i = 100; i < 150; i++) {
+      indexr(id, i);
     }
   }
-
+  
   protected void checkShardConsistency(String shard) throws Exception {
     checkShardConsistency(shard, false);
   }
   
-  protected String checkShardConsistency(String shard, boolean verbose) throws Exception {
+  protected String checkShardConsistency(String shard, boolean verbose)
+      throws Exception {
     
     List<SolrServer> solrClients = shardToClient.get(shard);
     if (solrClients == null) {
-      throw new RuntimeException("shard not found:" + shard + " keys:" + shardToClient.keySet());
+      throw new RuntimeException("shard not found:" + shard + " keys:"
+          + shardToClient.keySet());
     }
     long num = -1;
     long lastNum = -1;
     String failMessage = null;
     if (verbose) System.out.println("check const of " + shard);
     int cnt = 0;
+    
+    assertEquals(
+        "The client count does not match up with the shard count for slice:"
+            + shard,
+        zkStateReader.getCloudState().getSlice(DEFAULT_COLLECTION, shard)
+            .getShards().size(), solrClients.size());
+
     for (SolrServer client : solrClients) {
       ZkNodeProps props = clientToInfo.get(new CloudSolrServerClient(client));
       if (verbose) System.out.println("client" + cnt++);
@@ -858,20 +919,22 @@ public class FullSolrCloudTest extends A
         query.set("distrib", false);
         num = client.query(query).getResults().getNumFound();
       } catch (SolrServerException e) {
-        if (verbose) System.out.println("error contacting client: " + e.getMessage() + "\n");
+        if (verbose) System.out.println("error contacting client: "
+            + e.getMessage() + "\n");
         continue;
       }
-   
+      
       boolean live = false;
       String nodeName = props.get(ZkStateReader.NODE_NAME_PROP);
       if (zkStateReader.getCloudState().liveNodesContain(nodeName)) {
         live = true;
       }
       if (verbose) System.out.println(" live:" + live);
-
+      
       if (verbose) System.out.println(" num:" + num + "\n");
       
-      boolean active = props.get(ZkStateReader.STATE_PROP).equals(ZkStateReader.ACTIVE);
+      boolean active = props.get(ZkStateReader.STATE_PROP).equals(
+          ZkStateReader.ACTIVE);
       if (active && live) {
         if (lastNum > -1 && lastNum != num && failMessage == null) {
           failMessage = shard + " is not consistent, expected:" + lastNum
@@ -880,17 +943,19 @@ public class FullSolrCloudTest extends A
         lastNum = num;
       }
     }
-
+    
     return failMessage;
-   
+    
   }
   
   protected void checkShardConsistency() throws Exception {
     checkShardConsistency(true, false);
   }
   
-  protected void checkShardConsistency(boolean checkVsControl, boolean verbose) throws Exception {
-    long docs = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
+  protected void checkShardConsistency(boolean checkVsControl, boolean verbose)
+      throws Exception {
+    long docs = controlClient.query(new SolrQuery("*:*")).getResults()
+        .getNumFound();
     if (verbose) System.out.println("Control Docs:" + docs);
     
     updateMappingsFromZk(jettys, clients);
@@ -902,7 +967,7 @@ public class FullSolrCloudTest extends A
       if (shardFailMessage != null && failMessage == null) {
         failMessage = shardFailMessage;
       }
-    }   
+    }
     
     if (failMessage != null) {
       fail(failMessage);
@@ -925,8 +990,10 @@ public class FullSolrCloudTest extends A
               SolrQuery query = new SolrQuery("*:*");
               query.set("distrib", false);
               long results = client.query(query).getResults().getNumFound();
-              if (verbose) System.out.println(new ZkCoreNodeProps(props).getCoreUrl() + " : " + results);
-              if (verbose) System.out.println("shard:" + props.get(ZkStateReader.SHARD_ID_PROP));
+              if (verbose) System.out.println(new ZkCoreNodeProps(props)
+                  .getCoreUrl() + " : " + results);
+              if (verbose) System.out.println("shard:"
+                  + props.get(ZkStateReader.SHARD_ID_PROP));
               cnt += results;
               break;
             }
@@ -946,7 +1013,7 @@ public class FullSolrCloudTest extends A
               + cloudClientDocs, docs, cnt);
     }
   }
-
+  
   private SolrServer getClient(String nodeName) {
     for (CloudSolrServerClient client : clientToInfo.keySet()) {
       if (client.shardName.equals(nodeName)) {
@@ -955,13 +1022,17 @@ public class FullSolrCloudTest extends A
     }
     return null;
   }
-
+  
   protected void assertDocCounts(boolean verbose) throws Exception {
-    // TODO: as we create the clients, we should build a map from shard to node/client
+    // TODO: as we create the clients, we should build a map from shard to
+    // node/client
     // and node/client to shard?
-    if (verbose) System.out.println("control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n");
-    long controlCount = controlClient.query(new SolrQuery("*:*")).getResults().getNumFound();
-
+    if (verbose) System.out.println("control docs:"
+        + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound()
+        + "\n\n");
+    long controlCount = controlClient.query(new SolrQuery("*:*")).getResults()
+        .getNumFound();
+    
     // do some really inefficient mapping...
     ZkStateReader zk = new ZkStateReader(zkServer.getZkAddress(), 10000,
         AbstractZkTestCase.TIMEOUT);
@@ -976,14 +1047,17 @@ public class FullSolrCloudTest extends A
     }
     
     if (slices == null) {
-      throw new RuntimeException("Could not find collection " + DEFAULT_COLLECTION + " in " + cloudState.getCollections());
+      throw new RuntimeException("Could not find collection "
+          + DEFAULT_COLLECTION + " in " + cloudState.getCollections());
     }
-
+    
     for (SolrServer client : clients) {
       for (Map.Entry<String,Slice> slice : slices.entrySet()) {
         Map<String,ZkNodeProps> theShards = slice.getValue().getShards();
         for (Map.Entry<String,ZkNodeProps> shard : theShards.entrySet()) {
-          String shardName = new URI(((CommonsHttpSolrServer)client).getBaseURL()).getPort() + "_solr_";
+          String shardName = new URI(
+              ((CommonsHttpSolrServer) client).getBaseURL()).getPort()
+              + "_solr_";
           if (verbose && shard.getKey().endsWith(shardName)) {
             System.out.println("shard:" + slice.getKey());
             System.out.println(shard.getValue());
@@ -992,26 +1066,30 @@ public class FullSolrCloudTest extends A
       }
       
       long count = 0;
-      String currentState = clientToInfo.get(new CloudSolrServerClient(client)).get(ZkStateReader.STATE_PROP);
+      String currentState = clientToInfo.get(new CloudSolrServerClient(client))
+          .get(ZkStateReader.STATE_PROP);
       if (currentState != null && currentState.equals(ZkStateReader.ACTIVE)) {
         SolrQuery query = new SolrQuery("*:*");
         query.set("distrib", false);
         count = client.query(query).getResults().getNumFound();
       }
-
+      
       if (verbose) System.out.println("client docs:" + count + "\n\n");
     }
-    if (verbose) System.out.println("control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n");
+    if (verbose) System.out.println("control docs:"
+        + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound()
+        + "\n\n");
     SolrQuery query = new SolrQuery("*:*");
-    assertEquals("Doc Counts do not add up", controlCount, cloudClient.query(query).getResults().getNumFound());
+    assertEquals("Doc Counts do not add up", controlCount,
+        cloudClient.query(query).getResults().getNumFound());
   }
-
+  
   @Override
-  protected QueryResponse queryServer(ModifiableSolrParams params) throws SolrServerException {  
+  protected QueryResponse queryServer(ModifiableSolrParams params)
+      throws SolrServerException {
+    
+    if (r.nextBoolean()) params.set("collection", DEFAULT_COLLECTION);
     
-    if (r.nextBoolean())
-      params.set("collection",DEFAULT_COLLECTION);
-
     QueryResponse rsp = cloudClient.query(params);
     return rsp;
   }
@@ -1021,7 +1099,7 @@ public class FullSolrCloudTest extends A
     protected final int startI;
     protected final List<Integer> deletes = new ArrayList<Integer>();
     protected final AtomicInteger fails = new AtomicInteger();
-    protected boolean doDeletes;  
+    protected boolean doDeletes;
     
     public StopableIndexingThread(int startI, boolean doDeletes) {
       super("StopableIndexingThread");
@@ -1035,7 +1113,7 @@ public class FullSolrCloudTest extends A
       int i = startI;
       int numDeletes = 0;
       int numAdds = 0;
-
+      
       while (true && !stop) {
         ++i;
         
@@ -1068,13 +1146,14 @@ public class FullSolrCloudTest extends A
         
       }
       
-      System.err.println("added docs:" + numAdds + " with " + fails + " fails" + " deletes:" + numDeletes);
+      System.err.println("added docs:" + numAdds + " with " + fails + " fails"
+          + " deletes:" + numDeletes);
     }
     
     public void safeStop() {
       stop = true;
     }
-
+    
     public int getFails() {
       return fails.get();
     }
@@ -1095,7 +1174,7 @@ public class FullSolrCloudTest extends A
       zkStateReader.close();
     }
     super.tearDown();
- 
+    
     System.clearProperty("zkHost");
   }
   
@@ -1120,15 +1199,15 @@ public class FullSolrCloudTest extends A
   protected SolrServer createNewSolrServer(int port) {
     try {
       // setup the server...
-      String url = "http://localhost:" + port + context + "/" + DEFAULT_COLLECTION;
+      String url = "http://localhost:" + port + context + "/"
+          + DEFAULT_COLLECTION;
       CommonsHttpSolrServer s = new CommonsHttpSolrServer(url);
       s.setConnectionTimeout(100); // 1/10th sec
       s.setSoTimeout(30000);
       s.setDefaultMaxConnectionsPerHost(100);
       s.setMaxTotalConnections(100);
       return s;
-    }
-    catch (Exception ex) {
+    } catch (Exception ex) {
       throw new RuntimeException(ex);
     }
   }

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java?rev=1233976&r1=1233975&r2=1233976&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java Fri Jan 20 16:02:54 2012
@@ -92,9 +92,11 @@ public class OverseerTest extends SolrTe
       for (int i = 0; i < numShards; i++) {
         CloudDescriptor collection1Desc = new CloudDescriptor();
         collection1Desc.setCollectionName("collection1");
-        CoreDescriptor desc1 = new CoreDescriptor(null, "core"
-            + (i + 1), "");
+        CoreDescriptor desc1 = new CoreDescriptor(null, "core" + (i + 1), "");
         desc1.setCloudDescriptor(collection1Desc);
+        zkController.publishAsDown(zkController.getBaseUrl(), desc1,
+            zkController.getNodeName() + "_" + "core" + (i + 1), "core"
+                + (i + 1));
         ids[i] = zkController.register("core" + (i + 1), desc1);
       }
       
@@ -192,7 +194,12 @@ public class OverseerTest extends SolrTe
             final CoreDescriptor desc = new CoreDescriptor(null, coreName, "");
             desc.setCloudDescriptor(collection1Desc);
             try {
-              ids[slot] = controllers[slot % nodeCount].register(coreName, desc);
+              controllers[slot % nodeCount].publishAsDown(controllers[slot
+                  % nodeCount].getBaseUrl(), desc, controllers[slot
+                  % nodeCount].getNodeName()
+                  + "_" + coreName, coreName);
+              ids[slot] = controllers[slot % nodeCount]
+                  .register(coreName, desc);
             } catch (Throwable e) {
               e.printStackTrace();
               fail("register threw exception:" + e.getClass());

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java?rev=1233976&r1=1233975&r2=1233976&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/RecoveryZkTest.java Fri Jan 20 16:02:54 2012
@@ -123,6 +123,7 @@ public class RecoveryZkTest extends Full
   
   @Override
   public void tearDown() throws Exception {
+    printLayout();
     // make sure threads have been stopped...
     indexThread.safeStop();
     indexThread2.safeStop();

Modified: lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/DefaultConnectionStrategy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/DefaultConnectionStrategy.java?rev=1233976&r1=1233975&r2=1233976&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/DefaultConnectionStrategy.java (original)
+++ lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/DefaultConnectionStrategy.java Fri Jan 20 16:02:54 2012
@@ -20,6 +20,7 @@ package org.apache.solr.common.cloud;
 import java.io.IOException;
 import java.util.concurrent.TimeoutException;
 
+import org.apache.solr.common.SolrException;
 import org.apache.zookeeper.SolrZooKeeper;
 import org.apache.zookeeper.Watcher;
 import org.slf4j.Logger;
@@ -47,7 +48,7 @@ public class DefaultConnectionStrategy e
           .update(new SolrZooKeeper(serverAddress, zkClientTimeout, watcher));
       log.info("Reconnected to ZooKeeper");
     } catch (Exception e) {
-      log.error("", e);
+      SolrException.log(log, "Reconnect to ZooKeeper failed", e);
       log.info("Reconnect to ZooKeeper failed");
     }
     

Modified: lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java?rev=1233976&r1=1233975&r2=1233976&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java (original)
+++ lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java Fri Jan 20 16:02:54 2012
@@ -129,7 +129,7 @@ public class ZkCmdExecutor {
   protected void retryDelay(int attemptCount) {
     if (attemptCount > 0) {
       try {
-        Thread.sleep(Math.min(10000, attemptCount * retryDelay));
+        Thread.sleep(Math.max(10000, attemptCount * retryDelay));
       } catch (InterruptedException e) {
         LOG.debug("Failed to sleep: " + e, e);
       }