You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2012/01/05 07:03:03 UTC

svn commit: r1227497 - in /lucene/dev/branches/solrcloud/solr: core/src/java/org/apache/solr/client/solrj/embedded/ core/src/java/org/apache/solr/handler/admin/ core/src/java/org/apache/solr/update/ core/src/java/org/apache/solr/update/processor/ core/...

Author: markrmiller
Date: Thu Jan  5 06:03:02 2012
New Revision: 1227497

URL: http://svn.apache.org/viewvc?rev=1227497&view=rev
Log:
SOLR-3001 and other test stuff I was working on

Modified:
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java
    lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/AbstractDistributedZkTestCase.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/BasicSolrCloudTest.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySolrCloudTest.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkTestServer.java
    lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/update/SolrCmdDistributorTest.java
    lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java
    lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/SolrException.java

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java?rev=1227497&r1=1227496&r2=1227497&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/client/solrj/embedded/JettySolrRunner.java Thu Jan  5 06:03:02 2012
@@ -97,7 +97,7 @@ public class JettySolrRunner {
       connector.setReuseAddress(true);
       QueuedThreadPool threadPool = (QueuedThreadPool) connector.getThreadPool();
       if (threadPool != null) {
-        threadPool.setMaxStopTimeMs(100);
+        threadPool.setMaxStopTimeMs(1000);
       }
       server.setConnectors(new Connector[] { connector });
       server.setSessionIdManager(new HashSessionIdManager(new Random()));

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java?rev=1227497&r1=1227496&r2=1227497&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java Thu Jan  5 06:03:02 2012
@@ -601,6 +601,9 @@ public class CoreAdminHandler extends Re
     String nodeName = params.get("nodeName");
 
     SolrCore core = coreContainer.getCore(cname);
+    if (core == null) {
+      throw new SolrException(ErrorCode.BAD_REQUEST, "core not found:" + cname);
+    }
     try {
       String state;
       while (true) {

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java?rev=1227497&r1=1227496&r2=1227497&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/SolrCmdDistributor.java Thu Jan  5 06:03:02 2012
@@ -68,8 +68,8 @@ public class SolrCmdDistributor {
 
   private Response response = new Response();
   
-  private final Map<String,List<AddRequest>> adds = new HashMap<String,List<AddRequest>>();
-  private final Map<String,List<DeleteRequest>> deletes = new HashMap<String,List<DeleteRequest>>();
+  private final Map<Node,List<AddRequest>> adds = new HashMap<Node,List<AddRequest>>();
+  private final Map<Node,List<DeleteRequest>> deletes = new HashMap<Node,List<DeleteRequest>>();
   
   class AddRequest {
     AddUpdateCommand cmd;
@@ -81,11 +81,11 @@ public class SolrCmdDistributor {
     ModifiableSolrParams params;
   }
   
-  public void finish(List<Node> urls) {
+  public void finish() {
 
     // piggyback on any outstanding adds or deletes if possible.
-    flushAdds(1, null, null, urls);
-    flushDeletes(1, null, null, urls);
+    flushAdds(1, null, null);
+    flushDeletes(1, null, null);
 
     checkResponses(true);
   }
@@ -101,12 +101,11 @@ public class SolrCmdDistributor {
     }
   }
   
-  public void distribAdd(AddUpdateCommand cmd, List<Node> urls, ModifiableSolrParams params) throws IOException {
-    
+  public void distribAdd(AddUpdateCommand cmd, List<Node> nodes, ModifiableSolrParams commitParams) throws IOException {
     checkResponses(false);
     
     // make sure any pending deletes are flushed
-    flushDeletes(1, null, null, urls);
+    flushDeletes(1, null, null);
     
     // TODO: this is brittle
     // need to make a clone since these commands may be reused
@@ -118,77 +117,67 @@ public class SolrCmdDistributor {
     clone.setVersion(cmd.getVersion());
     AddRequest addRequest = new AddRequest();
     addRequest.cmd = clone;
-    addRequest.params = params;
+    addRequest.params = commitParams;
 
-    for (Node url : urls) {
-      List<AddRequest> alist = adds.get(url.getUrl());
+    for (Node node : nodes) {
+      List<AddRequest> alist = adds.get(node);
       if (alist == null) {
         alist = new ArrayList<AddRequest>(2);
-        adds.put(url.getUrl(), alist);
+        adds.put(node, alist);
       }
       alist.add(addRequest);
-     // System.out.println("buffer add to " + url);
     }
     
-    flushAdds(maxBufferedAddsPerServer, null, null, urls);
+    flushAdds(maxBufferedAddsPerServer, null, null);
   }
   
-  public void distribCommit(CommitUpdateCommand cmd, List<Node> urls, ModifiableSolrParams params)
-      throws IOException {
-    
-    // Wait for all outstanding repsonses to make sure that a commit
+  public void distribCommit(CommitUpdateCommand cmd, List<Node> nodes,
+      ModifiableSolrParams params) throws IOException {
+    // Wait for all outstanding responses to make sure that a commit
     // can't sneak in ahead of adds or deletes we already sent.
     // We could do this on a per-server basis, but it's more complex
     // and this solution will lead to commits happening closer together.
     checkResponses(true);
-
-    // piggyback on any outstanding adds or deletes if possible.
-    boolean piggied = false;
-    if (!flushAdds(1, cmd, params, urls)) {
-      if (flushDeletes(1, cmd, params, urls)) piggied = true;
-    } else {
-      piggied = true;
-    }
     
-    if (!piggied) {
-      UpdateRequestExt ureq = new UpdateRequestExt();
-      ureq.setParams(params);
-      
-      addCommit(ureq, cmd);
-      
-      for (Node url : urls) {
-        submit(ureq, url);
-      }
+    // currently, we dont try to piggy back on outstanding adds or deletes
+    
+    UpdateRequestExt ureq = new UpdateRequestExt();
+    ureq.setParams(params);
+    
+    addCommit(ureq, cmd);
+    
+    for (Node node : nodes) {
+      submit(ureq, node);
     }
     
     // if the command wanted to block until everything was committed,
     // then do that here.
-
+    
     if (cmd.waitSearcher) {
       checkResponses(true);
     }
   }
   
-  private void doDelete(DeleteUpdateCommand cmd, List<Node> urls,
+  private void doDelete(DeleteUpdateCommand cmd, List<Node> nodes,
       ModifiableSolrParams params) throws IOException {
     
-    flushAdds(1, null, null, urls);
+    flushAdds(1, null, null);
     
     DeleteUpdateCommand clonedCmd = clone(cmd);
     DeleteRequest deleteRequest = new DeleteRequest();
     deleteRequest.cmd = clonedCmd;
     deleteRequest.params = params;
-    for (Node url : urls) {
-      List<DeleteRequest> dlist = deletes.get(url.getUrl());
+    for (Node node : nodes) {
+      List<DeleteRequest> dlist = deletes.get(node);
       
       if (dlist == null) {
         dlist = new ArrayList<DeleteRequest>(2);
-        deletes.put(url.getUrl(), dlist);
+        deletes.put(node, dlist);
       }
       dlist.add(deleteRequest);
     }
     
-    flushDeletes(maxBufferedDeletesPerServer, null, null, urls);
+    flushDeletes(maxBufferedDeletesPerServer, null, null);
   }
   
   void addCommit(UpdateRequestExt ureq, CommitUpdateCommand cmd) {
@@ -197,69 +186,82 @@ public class SolrCmdDistributor {
         : AbstractUpdateRequest.ACTION.COMMIT, false, cmd.waitSearcher);
   }
   
-  boolean flushAdds(int limit, CommitUpdateCommand ccmd, ModifiableSolrParams params, List<Node> urls) {
+  boolean flushAdds(int limit, CommitUpdateCommand ccmd, ModifiableSolrParams commitParams) {
     // check for pending deletes
-    UpdateRequestExt ureq = null;
-    for (Node url : urls) {
-      List<AddRequest> alist = adds.get(url.getUrl());
+  
+    Set<Node> removeNodes = new HashSet<Node>();
+    Set<Node> nodes = adds.keySet();
+ 
+    for (Node node : nodes) {
+      List<AddRequest> alist = adds.get(node);
       if (alist == null || alist.size() < limit) return false;
-      if (ureq == null) {
-        ureq = new UpdateRequestExt();
-        
-        addCommit(ureq, ccmd);
-        
-        ModifiableSolrParams combinedParams = new ModifiableSolrParams();
-        
-        for (AddRequest aReq : alist) {
-          AddUpdateCommand cmd = aReq.cmd;
-          combinedParams.add(aReq.params);
-          ureq.add(cmd.solrDoc, cmd.commitWithin, cmd.overwrite);
-        }
-        
-        if (params != null) combinedParams.add(params);
-        if (ureq.getParams() == null) ureq.setParams(new ModifiableSolrParams());
-        ureq.getParams().add(combinedParams);
+  
+      UpdateRequestExt ureq = new UpdateRequestExt();
+      
+      addCommit(ureq, ccmd);
+      
+      ModifiableSolrParams combinedParams = new ModifiableSolrParams();
+      
+      for (AddRequest aReq : alist) {
+        AddUpdateCommand cmd = aReq.cmd;
+        combinedParams.add(aReq.params);
+       
+        ureq.add(cmd.solrDoc, cmd.commitWithin, cmd.overwrite);
       }
       
-      adds.remove(url.getUrl());
+      if (commitParams != null) combinedParams.add(commitParams);
+      if (ureq.getParams() == null) ureq.setParams(new ModifiableSolrParams());
+      ureq.getParams().add(combinedParams);
+
+      removeNodes.add(node);
       
-      submit(ureq, url);
+      submit(ureq, node);
+    }
+    
+    for (Node node : removeNodes) {
+      adds.remove(node);
     }
+    
     return true;
   }
   
-  boolean flushDeletes(int limit, CommitUpdateCommand ccmd, ModifiableSolrParams params, List<Node> urls) {
+  boolean flushDeletes(int limit, CommitUpdateCommand ccmd, ModifiableSolrParams commitParams) {
     // check for pending deletes
-    //System.out.println("flush deletes to " + urls);
-    UpdateRequestExt ureq = null;
-    for (Node url : urls) {
-      List<DeleteRequest> dlist = deletes.get(url.getUrl());
-      if (dlist == null || dlist.size() < limit) return false;
-      if (ureq == null) {
-        ureq = new UpdateRequestExt();
  
-        addCommit(ureq, ccmd);
-        
-        ModifiableSolrParams combinedParams = new ModifiableSolrParams();
-        
-        for (DeleteRequest dReq : dlist) {
-          DeleteUpdateCommand cmd = dReq.cmd;
-          combinedParams.add(dReq.params);
-          if (cmd.isDeleteById()) {
-            ureq.deleteById(cmd.getId(), cmd.getVersion());
-          } else {
-            ureq.deleteByQuery(cmd.query);
-          }
+    Set<Node> removeNodes = new HashSet<Node>();
+    Set<Node> nodes = deletes.keySet();
+    for (Node node : nodes) {
+      List<DeleteRequest> dlist = deletes.get(node);
+      if (dlist == null || dlist.size() < limit) return false;
+      UpdateRequestExt ureq = new UpdateRequestExt();
+      
+      addCommit(ureq, ccmd);
+      
+      ModifiableSolrParams combinedParams = new ModifiableSolrParams();
+      
+      for (DeleteRequest dReq : dlist) {
+        DeleteUpdateCommand cmd = dReq.cmd;
+        combinedParams.add(dReq.params);
+        if (cmd.isDeleteById()) {
+          ureq.deleteById(cmd.getId(), cmd.getVersion());
+        } else {
+          ureq.deleteByQuery(cmd.query);
         }
-
-        if (params != null) combinedParams.add(params);
-        if (ureq.getParams() == null) ureq.setParams(new ModifiableSolrParams());
+        
+        if (commitParams != null) combinedParams.add(commitParams);
+        if (ureq.getParams() == null) ureq
+            .setParams(new ModifiableSolrParams());
         ureq.getParams().add(combinedParams);
       }
       
-      deletes.remove(url.getUrl());
-      submit(ureq, url);
+      removeNodes.add(node);
+      submit(ureq, node);
     }
+    
+    for (Node node : removeNodes) {
+      deletes.remove(node);
+    }
+    
     return true;
   }
   
@@ -309,7 +311,7 @@ public class SolrCmdDistributor {
           } else {
             fullUrl = url;
           }
-          
+  
           CommonsHttpSolrServer server = new CommonsHttpSolrServer(fullUrl,
               client);
           
@@ -346,7 +348,8 @@ public class SolrCmdDistributor {
           if (sreq.rspCode != 0) {
             // error during request
             
-            // if there is a retry impl that returns true, we want to retry...
+            // if there is a retry url, we want to retry...
+            // TODO: but we really should only retry on connection errors...
             if (sreq.retries < 5 && sreq.node.checkRetry()) {
               sreq.retries++;
               sreq.rspCode = 0;
@@ -435,5 +438,33 @@ public class SolrCmdDistributor {
     public String getCoreName() {
       return coreName;
     }
+
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = 1;
+      result = prime * result + ((baseUrl == null) ? 0 : baseUrl.hashCode());
+      result = prime * result + ((coreName == null) ? 0 : coreName.hashCode());
+      result = prime * result + ((url == null) ? 0 : url.hashCode());
+      return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) return true;
+      if (obj == null) return false;
+      if (getClass() != obj.getClass()) return false;
+      StdNode other = (StdNode) obj;
+      if (baseUrl == null) {
+        if (other.baseUrl != null) return false;
+      } else if (!baseUrl.equals(other.baseUrl)) return false;
+      if (coreName == null) {
+        if (other.coreName != null) return false;
+      } else if (!coreName.equals(other.coreName)) return false;
+      if (url == null) {
+        if (other.url != null) return false;
+      } else if (!url.equals(other.url)) return false;
+      return true;
+    }
   }
 }

Modified: lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java?rev=1227497&r1=1227496&r2=1227497&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java Thu Jan  5 06:03:02 2012
@@ -97,8 +97,7 @@ public class DistributedUpdateProcessor 
   // method in this update processor
   private boolean isLeader = true;
   private boolean forwardToLeader = false;
-  private List<Node> urls;
-  private String shardId;
+  private List<Node> nodes;
 
   
   public DistributedUpdateProcessor(SolrQueryRequest req,
@@ -138,8 +137,7 @@ public class DistributedUpdateProcessor 
   }
 
   private List<Node> setupRequest(int hash) {
-    
-    List<Node> urls = null;
+    List<Node> nodes = null;
 
     // if we are in zk mode...
     if (zkEnabled) {
@@ -147,8 +145,7 @@ public class DistributedUpdateProcessor 
       // TODO: if there is no leader, wait and look again
       // TODO: we are reading the leader from zk every time - we should cache
       // this and watch for changes?? Just pull it from ZkController cluster state probably?
-
-      shardId = getShard(hash, collection, zkController.getCloudState()); // get the right shard based on the hash...
+      String shardId = getShard(hash, collection, zkController.getCloudState()); // get the right shard based on the hash...
 
       try {
         // TODO: if we find out we cannot talk to zk anymore, we should probably realize we are not
@@ -164,14 +161,16 @@ public class DistributedUpdateProcessor 
         
         if (req.getParams().getBool(SEEN_LEADER, false)) {
           // we are coming from the leader, just go local - add no urls
+          forwardToLeader = false;
         } else if (isLeader) {
           // that means I want to forward onto my replicas...
           // so get the replicas...
-          urls = getReplicaNodes(req, collection, shardId, nodeName);
+          forwardToLeader = false;
+          nodes = getReplicaNodes(req, collection, shardId, nodeName);
         } else {
           // I need to forward onto the leader...
-          urls = new ArrayList<Node>(1);
-          urls.add(new RetryNode(leaderProps, zkController.getZkStateReader(), collection, shardId));
+          nodes = new ArrayList<Node>(1);
+          nodes.add(new RetryNode(leaderProps, zkController.getZkStateReader(), collection, shardId));
           forwardToLeader = true;
         }
         
@@ -182,7 +181,7 @@ public class DistributedUpdateProcessor 
       }
     }
 
-    return urls;
+    return nodes;
   }
   
   private String getShard(int hash, String collection, CloudState cloudState) {
@@ -195,11 +194,10 @@ public class DistributedUpdateProcessor 
   @Override
   public void processAdd(AddUpdateCommand cmd) throws IOException {
     // TODO: check for id field?
-    
     int hash = 0;
     if (zkEnabled) {
       hash = hash(cmd);
-      urls = setupRequest(hash);
+      nodes = setupRequest(hash);
     } else {
       // even in non zk mode, tests simulate updates from a leader
       isLeader = !req.getParams().getBool(SEEN_LEADER, false);
@@ -216,12 +214,12 @@ public class DistributedUpdateProcessor 
     }
     
     ModifiableSolrParams params = null;
-    if (urls != null) {
+    if (nodes != null) {
       params = new ModifiableSolrParams(req.getParams());
       if (isLeader) {
         params.set(SEEN_LEADER, true);
       }
-      cmdDistrib.distribAdd(cmd, urls, params);
+      cmdDistrib.distribAdd(cmd, nodes, params);
     }
     
     // TODO: what to do when no idField?
@@ -247,46 +245,49 @@ public class DistributedUpdateProcessor 
     // TODO: if not a forward and replication req is not specified, we could
     // send in a background thread
 
-    cmdDistrib.finish(urls);
+    cmdDistrib.finish();
     Response response = cmdDistrib.getResponse();
     // nocommit - we may need to tell about more than one error...
     
     // if its a forward, any fail is a problem - 
     // otherwise we assume things are fine if we got it locally
     // until we start allowing min replication param
-    if (forwardToLeader && response.errors.size() > 0) {
-      rsp.setException(response.errors.get(0).e);
-    } else {
-      rsp.setException(null);
+    if (response.errors.size() > 0) {
+      // for now we don't error - we assume if it was added locally, we
+      // succeeded - nocommit: forwards should error
+      //rsp.setException(response.errors.get(0).e);
     }
    
     
     // if it is not a forward request, for each fail, try to tell them to
-    // recover
-    if (!forwardToLeader) {
-      for (SolrCmdDistributor.Error error : response.errors) {
+    // recover nocommit: we would really like to only do this on connection problems
+
+    for (SolrCmdDistributor.Error error : response.errors) {
+      if (error.node instanceof RetryNode) {
+        continue;
+      }
+      // TODO: we should force their state to recovering ??
+      
+      // TODO: do retries??
+      // TODO: what if its is already recovering? Right now they line up -
+      // should they?
+      CommonsHttpSolrServer server;
+      try {
+        server = new CommonsHttpSolrServer(error.node.getBaseUrl());
         
-        // TODO: we should force their state to recovering ??
+        RequestRecovery recoverRequestCmd = new RequestRecovery();
+        recoverRequestCmd.setAction(CoreAdminAction.REQUESTRECOVERY);
+        recoverRequestCmd.setCoreName(error.node.getCoreName());
         
-        // TODO: do retries??
-        // TODO: what if its is already recovering? Right now they line up - should they?
-        CommonsHttpSolrServer server;
-        try {
-          server = new CommonsHttpSolrServer(error.node.getBaseUrl());
-          
-          RequestRecovery recoverRequestCmd = new RequestRecovery();
-          recoverRequestCmd.setAction(CoreAdminAction.REQUESTRECOVERY);
-          recoverRequestCmd.setCoreName(error.node.getCoreName());
-          
-          server.request(recoverRequestCmd);
-        } catch (MalformedURLException e) {
-          log.warn("Problem trying to tell a replica to recover", e);
-        } catch (SolrServerException e) {
-          log.warn("Problem trying to tell a replica to recover", e);
-        } catch (IOException e) {
-          log.warn("Problem trying to tell a replica to recover", e);
-        }
+        server.request(recoverRequestCmd);
+      } catch (MalformedURLException e) {
+        log.warn("Problem trying to tell a replica to recover", e);
+      } catch (SolrServerException e) {
+        log.warn("Problem trying to tell a replica to recover", e);
+      } catch (IOException e) {
+        log.warn("", e);
       }
+      
     }
   }
 
@@ -416,7 +417,7 @@ public class DistributedUpdateProcessor 
     int hash = 0;
     if (zkEnabled) {
       hash = hash(cmd);
-      urls = setupRequest(hash);
+      nodes = setupRequest(hash);
     } else {
       // even in non zk mode, tests simulate updates from a leader
       isLeader = !req.getParams().getBool(SEEN_LEADER, false);
@@ -433,12 +434,12 @@ public class DistributedUpdateProcessor 
     }
 
     ModifiableSolrParams params = null;
-    if (urls != null) {
+    if (nodes != null) {
       params = new ModifiableSolrParams(req.getParams());
       if (isLeader) {
         params.set(SEEN_LEADER, true);
       }
-      cmdDistrib.distribDelete(cmd, urls, params);
+      cmdDistrib.distribDelete(cmd, nodes, params);
     }
 
     // cmd.getIndexId == null when delete by query
@@ -613,11 +614,11 @@ public class DistributedUpdateProcessor 
         String nodeName = req.getCore().getCoreDescriptor().getCoreContainer()
             .getZkController().getNodeName();
         String shardZkNodeName = nodeName + "_" + req.getCore().getName();
-        urls = getReplicaUrls(req, req.getCore().getCoreDescriptor()
+        List<Node> nodes = getReplicaUrls(req, req.getCore().getCoreDescriptor()
             .getCloudDescriptor().getCollectionName(), shardZkNodeName);
 
-        if (urls != null) {
-          cmdDistrib.distribCommit(cmd, urls, params);
+        if (nodes != null) {
+          cmdDistrib.distribCommit(cmd, nodes, params);
           finish();
         }
       }
@@ -626,17 +627,16 @@ public class DistributedUpdateProcessor 
   
   @Override
   public void finish() throws IOException {
-    if (urls != null) {
-      doFinish();
-    }
-    if (next != null && urls == null) next.finish();
+    doFinish();
+    
+    if (next != null && nodes == null) next.finish();
   }
  
   private List<Node> getReplicaNodes(SolrQueryRequest req, String collection,
       String shardId, String thisNodeName) {
     CloudState cloudState = req.getCore().getCoreDescriptor()
         .getCoreContainer().getZkController().getCloudState();
-   
+
     Map<String,Slice> slices = cloudState.getSlices(collection);
     if (slices == null) {
       throw new ZooKeeperException(ErrorCode.BAD_REQUEST, "Could not find collection in zk: " + cloudState);
@@ -658,6 +658,7 @@ public class DistributedUpdateProcessor 
       }
     }
     if (nodes.size() == 0) {
+      // no replicas - go local
       return null;
     }
     return nodes;
@@ -733,12 +734,33 @@ public class DistributedUpdateProcessor 
         Thread.currentThread().interrupt();
         return false;
       }
-      String newUrl = leaderProps.getCoreUrl();
-      if (!this.url.equals(newUrl)) {
-        this.url = newUrl;
-        return true;
-      }
-      return false;
+      
+      this.url = leaderProps.getCoreUrl();
+
+      return true;
+    }
+
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = super.hashCode();
+      result = prime * result
+          + ((collection == null) ? 0 : collection.hashCode());
+      result = prime * result + ((shardId == null) ? 0 : shardId.hashCode());
+      return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) return true;
+      if (!super.equals(obj)) return false;
+      if (getClass() != obj.getClass()) return false;
+      RetryNode other = (RetryNode) obj;
+      if (url == null) {
+        if (other.url != null) return false;
+      } else if (!url.equals(other.url)) return false;
+
+      return true;
     }
   }
   

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/AbstractDistributedZkTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/AbstractDistributedZkTestCase.java?rev=1227497&r1=1227496&r2=1227497&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/AbstractDistributedZkTestCase.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/AbstractDistributedZkTestCase.java Thu Jan  5 06:03:02 2012
@@ -81,8 +81,8 @@ public abstract class AbstractDistribute
     System.clearProperty("collection");
     System.clearProperty("solr.test.sys.prop1");
     System.clearProperty("solr.test.sys.prop2");
-    super.tearDown();
     resetExceptionIgnores();
+    super.tearDown();
     SolrConfig.severeErrors.clear();
   }
   

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/BasicSolrCloudTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/BasicSolrCloudTest.java?rev=1227497&r1=1227496&r2=1227497&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/BasicSolrCloudTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/BasicSolrCloudTest.java Thu Jan  5 06:03:02 2012
@@ -17,9 +17,13 @@ package org.apache.solr.cloud;
  * limitations under the License.
  */
 
+import java.net.MalformedURLException;
+
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServer;
 import org.apache.solr.client.solrj.SolrServerException;
+import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
+import org.apache.solr.client.solrj.impl.StreamingUpdateSolrServer;
 import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.SolrInputDocument;
@@ -63,7 +67,6 @@ public class BasicSolrCloudTest extends 
     params.add("q", t1 + ":originalcontent");
     QueryResponse results = clients.get(0).query(params);
     assertEquals(1, results.getResults().getNumFound());
-    System.out.println("results:" + results);
     
     // update doc
     indexr("id", docId, t1, "updatedcontent");
@@ -73,7 +76,6 @@ public class BasicSolrCloudTest extends 
     assertDocCounts(VERBOSE);
     
     results = clients.get(0).query(params);
-    System.out.println("results1:" + results.getResults());
     assertEquals(0, results.getResults().getNumFound());
     
     params.set("q", t1 + ":updatedcontent");
@@ -87,8 +89,6 @@ public class BasicSolrCloudTest extends 
     
     commit();
     
-    System.out.println("results2:" + results.getResults());
-    
     results = clients.get(0).query(params);
     assertEquals(0, results.getResults().getNumFound());
     
@@ -97,11 +97,9 @@ public class BasicSolrCloudTest extends 
     //uReq.setParam(UpdateParams.UPDATE_CHAIN, DISTRIB_UPDATE_CHAIN);
     SolrInputDocument doc1 = new SolrInputDocument();
 
-    System.out.println("add doc1:" + doc1);
     addFields(doc1, "id", docId++);
     uReq.add(doc1);
     SolrInputDocument doc2 = new SolrInputDocument();
-    System.out.println("add doc2:" + doc2);
     addFields(doc2, "id", docId++);
     uReq.add(doc2);
     
@@ -112,16 +110,12 @@ public class BasicSolrCloudTest extends 
     
     checkShardConsistency();
     
-    System.out.println("controldocs: " + query(controlClient).getResults().getNumFound());
-    System.out.println("clouddocs: " + query(cloudClient).getResults().getNumFound());
-    
     assertDocCounts(VERBOSE);
     
     results = query(cloudClient);
     assertEquals(2, results.getResults().getNumFound());
     
     // two deletes
-    System.out.println("delete:" + Long.toString(docId-1));
     uReq = new UpdateRequest();
     uReq.deleteById(Long.toString(docId-1));
     uReq.deleteById(Long.toString(docId-2)).process(cloudClient);
@@ -143,18 +137,15 @@ public class BasicSolrCloudTest extends 
     doc1 = new SolrInputDocument();
 
     addFields(doc1, "id", docId++);
-    System.out.println("added doc:" + docId);
     uReq.add(doc1);
     doc2 = new SolrInputDocument();
     addFields(doc2, "id", docId++);
-    System.out.println("added doc:" + docId);
     uReq.add(doc2);
  
     uReq.process(cloudClient);
     uReq.process(controlClient);
     
     uReq = new UpdateRequest();
-    System.out.println("delete doc:" + (docId - 2));
     uReq.deleteById(Long.toString(docId - 2)).process(cloudClient);
     controlClient.deleteById(Long.toString(docId - 2));
     
@@ -169,6 +160,22 @@ public class BasicSolrCloudTest extends 
     
     results = query(cloudClient);
     assertEquals(2, results.getResults().getNumFound());
+    
+    testIndexingWithSuss();
+  }
+
+  private void testIndexingWithSuss() throws MalformedURLException, Exception {
+    StreamingUpdateSolrServer suss = new StreamingUpdateSolrServer(
+        ((CommonsHttpSolrServer) clients.get(0)).getBaseURL(), 3, 1);
+    
+    for (int i=100; i<150; i++) {
+      index_specific(suss, id, i);      
+    }
+    suss.blockUntilFinished();
+    
+    commit();
+    
+    checkShardConsistency(false);
   }
 
   private QueryResponse query(SolrServer server) throws SolrServerException {

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java?rev=1227497&r1=1227496&r2=1227497&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkey.java Thu Jan  5 06:03:02 2012
@@ -17,6 +17,7 @@ package org.apache.solr.cloud;
  * limitations under the License.
  */
 
+import java.io.IOException;
 import java.net.BindException;
 import java.util.ArrayList;
 import java.util.List;
@@ -34,6 +35,7 @@ import org.apache.solr.common.cloud.ZkSt
 import org.apache.solr.core.CoreContainer;
 import org.apache.solr.servlet.SolrDispatchFilter;
 import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.ZooKeeper;
 import org.mortbay.jetty.servlet.FilterHolder;
 
 /**
@@ -49,6 +51,7 @@ public class ChaosMonkey {
   protected static final boolean EXPIRE_SESSIONS = false;
   protected static final boolean CAUSE_CONNECTION_LOSS = false;
   private Map<String,List<CloudJettyRunner>> shardToJetty;
+  
   private ZkTestServer zkServer;
   private ZkStateReader zkStateReader;
   private String collection;
@@ -81,7 +84,7 @@ public class ChaosMonkey {
   public void expireRandomSession() throws KeeperException, InterruptedException {
     String sliceName = getRandomSlice();
     
-    JettySolrRunner jetty = getRandomSacraficialJetty(sliceName, DONTKILLLEADER);
+    JettySolrRunner jetty = getRandomJetty(sliceName, DONTKILLLEADER);
     if (jetty != null) {
       expireSession(jetty);
     }
@@ -90,10 +93,11 @@ public class ChaosMonkey {
   public void randomConnectionLoss() throws KeeperException, InterruptedException {
     String sliceName = getRandomSlice();
     
-    JettySolrRunner jetty = getRandomSacraficialJetty(sliceName, DONTKILLLEADER);
+    JettySolrRunner jetty = getRandomJetty(sliceName, DONTKILLLEADER);
     if (jetty != null) {
       System.out.println("cause connection loss");
       causeConnectionLoss(jetty);
+      connloss.incrementAndGet();
     }
   }
   
@@ -103,9 +107,28 @@ public class ChaosMonkey {
     if (solrDispatchFilter != null) {
       CoreContainer cores = solrDispatchFilter.getCores();
       if (cores != null) {
+        ZkController zkController = cores.getZkController();
         SolrZkClient zkClient = cores.getZkController().getZkClient();
-        // over double tick time...
-        zkClient.getSolrZooKeeper().pauseCnxn(ZkTestServer.TICK_TIME * 3);
+        
+        // nocommit: two ways to try to force connectionloss...
+        // must be at least double tick time...
+        // zkClient.getSolrZooKeeper().pauseCnxn(ZkTestServer.TICK_TIME * 2);
+        
+        // open a new zk with same id and close it - should cause connection loss
+        ZooKeeper zoo2;
+        try {
+          zoo2 = new ZooKeeper(zkController.getZkServerAddress(), zkClient.getSolrZooKeeper().getSessionTimeout(),
+          null,
+          zkClient.getSolrZooKeeper().getSessionId(), null);
+          zoo2.close();
+        } catch (IOException e1) {
+          // TODO Auto-generated catch block
+          e1.printStackTrace();
+        } catch (InterruptedException e) {
+          // TODO Auto-generated catch block
+          e.printStackTrace();
+        }
+
       }
     }
   }
@@ -198,7 +221,7 @@ public class ChaosMonkey {
   }
   
   public JettySolrRunner stopRandomShard(String slice) throws Exception {
-    JettySolrRunner jetty = getRandomSacraficialJetty(slice, DONTKILLLEADER);
+    JettySolrRunner jetty = getRandomJetty(slice, DONTKILLLEADER);
     if (jetty != null) {
       stopJetty(jetty);
     }
@@ -223,14 +246,14 @@ public class ChaosMonkey {
   }
   
   public JettySolrRunner killRandomShard(String slice) throws Exception {
-    JettySolrRunner jetty = getRandomSacraficialJetty(slice, DONTKILLLEADER);
+    JettySolrRunner jetty = getRandomJetty(slice, DONTKILLLEADER);
     if (jetty != null) {
       killJetty(jetty);
     }
     return jetty;
   }
   
-  public JettySolrRunner getRandomSacraficialJetty(String slice, boolean dontkillleader) throws KeeperException, InterruptedException {
+  public JettySolrRunner getRandomJetty(String slice, boolean dontkillleader) throws KeeperException, InterruptedException {
     // get latest cloud state
     zkStateReader.updateCloudState(true);
     Slice theShards = zkStateReader.getCloudState().getSlices(collection)
@@ -288,13 +311,29 @@ public class ChaosMonkey {
     
     if (dontkillleader && leader.get(ZkStateReader.NODE_NAME_PROP).equals(jetties.get(index).nodeName)) {
       // we don't kill leaders...
-      System.out.println("dont kill the leader");
       return null;
     }
     
     return jetty;
   }
   
+  public SolrServer getRandomClient(String slice) throws KeeperException, InterruptedException {
+    // get latest cloud state
+    zkStateReader.updateCloudState(true);
+    Slice theShards = zkStateReader.getCloudState().getSlices(collection)
+        .get(slice);
+    
+    // get random shard
+    List<SolrServer> clients = shardToClient.get(slice);
+    int index = random.nextInt(clients.size() - 1);
+    SolrServer client = clients.get(index);
+    
+    ZkNodeProps leader = zkStateReader.getLeaderProps(collection, slice);
+
+    
+    return client;
+  }
+  
   // synchronously starts and stops shards randomly, unless there is only one
   // active shard up for a slice or if there is one active and others recovering
   public void startTheMonkey() {
@@ -310,7 +349,7 @@ public class ChaosMonkey {
             
             if (random.nextBoolean()) {
              if (!deadPool.isEmpty()) {
-               System.out.println("start jetty");
+               //System.out.println("start jetty");
                JettySolrRunner jetty = deadPool.remove(random.nextInt(deadPool.size()));
                if (jetty.isRunning()) {
                  
@@ -328,7 +367,7 @@ public class ChaosMonkey {
                    jetty.start();
                  }
                }
-               System.out.println("started on port:" + jetty.getLocalPort());
+               //System.out.println("started on port:" + jetty.getLocalPort());
                starts.incrementAndGet();
                continue;
              }
@@ -336,26 +375,28 @@ public class ChaosMonkey {
             
             int rnd = random.nextInt(10);
             // nocommit: we dont randomly expire yet
-            if (EXPIRE_SESSIONS && rnd < 2) {
+            if (EXPIRE_SESSIONS && rnd < 4) {
               expireRandomSession();
               expires.incrementAndGet();
-            } else if (CAUSE_CONNECTION_LOSS && rnd < 4) {
-              
+            } 
+            
+            if (CAUSE_CONNECTION_LOSS && rnd < 10) {
+              randomConnectionLoss();
               randomConnectionLoss();
-              connloss.incrementAndGet();
+            }
+            
+            JettySolrRunner jetty;
+            if (random.nextBoolean()) {
+              jetty = stopRandomShard();
             } else {
-              JettySolrRunner jetty;
-              if (random.nextBoolean()) {
-                jetty = stopRandomShard();
-              } else {
-                jetty = killRandomShard();
-              }
-              if (jetty == null) {
-                System.out.println("we cannot kill");
-              } else {
-                deadPool.add(jetty);
-              }
+              jetty = killRandomShard();
             }
+            if (jetty == null) {
+              // System.out.println("we cannot kill");
+            } else {
+              deadPool.add(jetty);
+            }
+            
           } catch (InterruptedException e) {
             //
           } catch (Exception e) {

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySolrCloudTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySolrCloudTest.java?rev=1227497&r1=1227496&r2=1227497&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySolrCloudTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ChaosMonkeySolrCloudTest.java Thu Jan  5 06:03:02 2012
@@ -22,20 +22,46 @@ import java.util.List;
 
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.common.SolrInputDocument;
+import org.junit.After;
 import org.junit.AfterClass;
+import org.junit.Before;
 import org.junit.BeforeClass;
 
 public class ChaosMonkeySolrCloudTest extends FullSolrCloudTest {
   
   @BeforeClass
   public static void beforeSuperClass() throws Exception {
+    
+  }
+  
+  @AfterClass
+  public static void afterSuperClass() throws Exception {
+    
+  }
+  
+  @Before
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
     // we expect this time of exception as shards go up and down...
     ignoreException("shard update error ");
     ignoreException("Connection refused");
+    ignoreException("interrupted waiting for shard update response");
+    ignoreException("org\\.mortbay\\.jetty\\.EofException");
+    ignoreException("java\\.lang\\.InterruptedException");
+    ignoreException("java\\.nio\\.channels\\.ClosedByInterruptException");
+    
+    
+    // sometimes we cannot get the same port
+    ignoreException("java\\.net\\.BindException: Address already in use");
+    
+    System.setProperty("numShards", Integer.toString(sliceCount));
   }
   
-  @AfterClass
-  public static void afterSuperClass() throws Exception {
+  @Override
+  @After
+  public void tearDown() throws Exception {
+    super.tearDown();
     resetExceptionIgnores();
   }
   
@@ -82,23 +108,19 @@ public class ChaosMonkeySolrCloudTest ex
     // try and wait for any replications and what not to finish...
 
     // give a moment to make sure any recoveries have started
-    Thread.sleep(1000);
+    Thread.sleep(4000);
     
     // wait until there are no recoveries...
     waitForRecoveriesToFinish(VERBOSE);
     
+    
     commit();
 
-    checkShardConsistency(false);
+    checkShardConsistency(true);
     
     if (VERBOSE) System.out.println("control docs:" + controlClient.query(new SolrQuery("*:*")).getResults().getNumFound() + "\n\n");
   }
   
-  @Override
-  public void tearDown() throws Exception {
-    super.tearDown();
-  }
-  
   // skip the randoms - they can deadlock...
   protected void indexr(Object... fields) throws Exception {
     SolrInputDocument doc = new SolrInputDocument();

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java?rev=1227497&r1=1227496&r2=1227497&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java Thu Jan  5 06:03:02 2012
@@ -50,6 +50,7 @@ import org.apache.solr.common.params.Com
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.servlet.SolrDispatchFilter;
 import org.apache.zookeeper.KeeperException;
+import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
@@ -458,13 +459,10 @@ public class FullSolrCloudTest extends A
     
     // index a bad doc...
     try {
-      ignoreException("Document is missing mandatory uniqueKey field: id");
       indexr(t1,"a doc with no id");
       fail("this should fail");
     } catch (SolrException e) {
       // expected
-    } finally {
-      resetExceptionIgnores();
     }
     
     // TODO: bring this to it's own method?
@@ -817,7 +815,7 @@ public class FullSolrCloudTest extends A
     checkShardConsistency(shard, false);
   }
   
-  protected void checkShardConsistency(String shard, boolean verbose) throws Exception {
+  protected String checkShardConsistency(String shard, boolean verbose) throws Exception {
     
     List<SolrServer> solrClients = shardToClient.get(shard);
     if (solrClients == null) {
@@ -851,10 +849,8 @@ public class FullSolrCloudTest extends A
         lastNum = num;
       }
     }
-    
-    if (failMessage != null) {
-      fail(failMessage);
-    }
+
+    return failMessage;
    
   }
   
@@ -869,8 +865,13 @@ public class FullSolrCloudTest extends A
     updateMappingsFromZk(jettys, clients);
     
     Set<String> theShards = shardToClient.keySet();
+    String failMessage = null;
     for (String shard : theShards) {
-      checkShardConsistency(shard, verbose);
+      failMessage = checkShardConsistency(shard, verbose);
+    }   
+    
+    if (failMessage != null) {
+      fail(failMessage);
     }
     
     // now check that the right # are on each shard
@@ -1064,6 +1065,7 @@ public class FullSolrCloudTest extends A
   };
   
   @Override
+  @After
   public void tearDown() throws Exception {
     if (VERBOSE) {
       super.printLayout();

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkTestServer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkTestServer.java?rev=1227497&r1=1227496&r2=1227497&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkTestServer.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/cloud/ZkTestServer.java Thu Jan  5 06:03:02 2012
@@ -245,13 +245,8 @@ public class ZkTestServer {
 
   @SuppressWarnings("deprecation")
   public void shutdown() throws IOException {
-    SolrTestCaseJ4.ignoreException("java.nio.channels.ClosedChannelException");
     // TODO: this can log an exception while trying to unregister a JMX MBean
-    try {
-      zkServer.shutdown();
-    } finally {
-      SolrTestCaseJ4.resetExceptionIgnores();
-    }
+    zkServer.shutdown();
   }
  
   

Modified: lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/update/SolrCmdDistributorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/update/SolrCmdDistributorTest.java?rev=1227497&r1=1227496&r2=1227497&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/update/SolrCmdDistributorTest.java (original)
+++ lucene/dev/branches/solrcloud/solr/core/src/test/org/apache/solr/update/SolrCmdDistributorTest.java Thu Jan  5 06:03:02 2012
@@ -100,7 +100,7 @@ public class SolrCmdDistributorTest exte
     
     CommitUpdateCommand ccmd = new CommitUpdateCommand(null, false);
     cmdDistrib.distribCommit(ccmd, nodes, params);
-    cmdDistrib.finish(nodes);
+    cmdDistrib.finish();
     Response response = cmdDistrib.getResponse();
     
     assertEquals(response.errors.toString(), 0, response.errors.size());
@@ -109,12 +109,12 @@ public class SolrCmdDistributorTest exte
         .getNumFound();
     assertEquals(1, numFound);
     
-    CommonsHttpSolrServer client2 = (CommonsHttpSolrServer) clients.get(0);
+    CommonsHttpSolrServer client = (CommonsHttpSolrServer) clients.get(0);
     nodeProps = new ZkNodeProps(ZkStateReader.BASE_URL_PROP,
-        client2.getBaseURL(), ZkStateReader.CORE_PROP, "");
+        client.getBaseURL(), ZkStateReader.CORE_PROP, "");
     nodes.add(new StdNode(new ZkCoreNodeProps(nodeProps)));
     
-    // add another 3 docs to both control and client1
+    // add another 2 docs to control and 3 to client
     
     cmd.solrDoc = getSolrDoc("id", 2);
     cmdDistrib.distribAdd(cmd, nodes, params);
@@ -127,19 +127,19 @@ public class SolrCmdDistributorTest exte
     AddUpdateCommand cmd3 = new AddUpdateCommand(null);
     cmd3.solrDoc = getSolrDoc("id", 4);
     
-    cmdDistrib.distribAdd(cmd3, Collections.singletonList(nodes.get(0)), params);
+    cmdDistrib.distribAdd(cmd3, Collections.singletonList(nodes.get(1)), params);
     
     cmdDistrib.distribCommit(ccmd, nodes, params);
-    cmdDistrib.finish(nodes);
+    cmdDistrib.finish();
     response = cmdDistrib.getResponse();
     
     assertEquals(response.errors.toString(), 0, response.errors.size());
     
     SolrDocumentList results = controlClient.query(new SolrQuery("*:*")).getResults();
     numFound = results.getNumFound();
-    assertEquals(results.toString(), 4, numFound);
+    assertEquals(results.toString(), 3, numFound);
     
-    numFound = client2.query(new SolrQuery("*:*")).getResults()
+    numFound = client.query(new SolrQuery("*:*")).getResults()
         .getNumFound();
     assertEquals(3, numFound);
     
@@ -151,16 +151,16 @@ public class SolrCmdDistributorTest exte
     cmdDistrib.distribDelete(dcmd, nodes, params);
     
     cmdDistrib.distribCommit(ccmd, nodes, params);
-    cmdDistrib.finish(nodes);
+    cmdDistrib.finish();
     response = cmdDistrib.getResponse();
     
     assertEquals(response.errors.toString(), 0, response.errors.size());
     
     results = controlClient.query(new SolrQuery("*:*")).getResults();
     numFound = results.getNumFound();
-    assertEquals(results.toString(), 3, numFound);
+    assertEquals(results.toString(), 2, numFound);
     
-    numFound = client2.query(new SolrQuery("*:*")).getResults()
+    numFound = client.query(new SolrQuery("*:*")).getResults()
         .getNumFound();
     assertEquals(results.toString(), 2, numFound);
   }

Modified: lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java?rev=1227497&r1=1227496&r2=1227497&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java (original)
+++ lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttpSolrServer.java Thu Jan  5 06:03:02 2012
@@ -27,6 +27,7 @@ import org.apache.solr.common.SolrExcept
 
 import java.io.IOException;
 import java.lang.ref.WeakReference;
+import java.net.ConnectException;
 import java.net.MalformedURLException;
 import java.net.SocketException;
 import java.net.URL;
@@ -251,6 +252,7 @@ public class LBHttpSolrServer extends So
         return rsp; // SUCCESS
       } catch (SolrException e) {
         // we retry on 404 or 403 or 503 - you can see this on solr shutdown
+        System.err.println("code:" + e.code());
         if (e.code() == 404 || e.code() == 403 || e.code() == 503) {
           ex = addZombie(server, e);
         } else {
@@ -265,7 +267,8 @@ public class LBHttpSolrServer extends So
       } catch (SocketException e) {
         ex = addZombie(server, e);
       } catch (SolrServerException e) {
-        if (e.getRootCause() instanceof IOException) {
+        Throwable rootCause = e.getRootCause();
+        if (rootCause instanceof IOException || rootCause instanceof ConnectException) {
           ex = addZombie(server, e);
         } else {
           throw e;

Modified: lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/SolrException.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/SolrException.java?rev=1227497&r1=1227496&r2=1227497&view=diff
==============================================================================
--- lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/SolrException.java (original)
+++ lucene/dev/branches/solrcloud/solr/solrj/src/java/org/apache/solr/common/SolrException.java Thu Jan  5 06:03:02 2012
@@ -17,6 +17,7 @@
 
 package org.apache.solr.common;
 
+import org.apache.zookeeper.KeeperException.ConnectionLossException;
 import org.slf4j.Logger;
 
 import java.io.CharArrayWriter;
@@ -102,6 +103,10 @@ public class SolrException extends Runti
 
   public void log(Logger log) { log(log,this); }
   public static void log(Logger log, Throwable e) {
+    // nocommit
+    if (e instanceof ConnectionLossException || e.getCause() != null && e.getCause() instanceof ConnectionLossException) {
+      e.printStackTrace();
+    }
     if (e instanceof SolrException) {
       ((SolrException)e).logged = true;
     }
@@ -116,6 +121,10 @@ public class SolrException extends Runti
   }
 
   public static void log(Logger log, String msg, Throwable e) {
+  // nocommit
+    if (e instanceof ConnectionLossException || getRootCause(e) != null && getRootCause(e) instanceof ConnectionLossException) {
+      e.printStackTrace();
+    }
     if (e instanceof SolrException) {
       ((SolrException)e).logged = true;
     }
@@ -169,11 +178,23 @@ public class SolrException extends Runti
     for (String regex : ignorePatterns) {
       Pattern pattern = Pattern.compile(regex);
       Matcher matcher = pattern.matcher(m);
+      
       if (matcher.find()) return "Ignoring exception matching " + regex;
     }
 
     return null;
   }
-
+  
+  public static Throwable getRootCause(Throwable t) {
+    while (true) {
+      Throwable cause = t.getCause();
+      if (cause!=null) {
+        t = cause;
+      } else {
+        break;
+      }
+    }
+    return t;
+  }
 
 }