You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2014/08/22 16:15:28 UTC

svn commit: r1619817 - in /lucene/dev/branches/lucene_solr_4_10: ./ solr/ solr/CHANGES.txt solr/core/ solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java solr/solrj/ solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java

Author: markrmiller
Date: Fri Aug 22 14:15:27 2014
New Revision: 1619817

URL: http://svn.apache.org/r1619817
Log:
SOLR-6405: ZooKeeper calls can easily not be retried enough on ConnectionLoss.

Modified:
    lucene/dev/branches/lucene_solr_4_10/   (props changed)
    lucene/dev/branches/lucene_solr_4_10/solr/   (props changed)
    lucene/dev/branches/lucene_solr_4_10/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene_solr_4_10/solr/core/   (props changed)
    lucene/dev/branches/lucene_solr_4_10/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java
    lucene/dev/branches/lucene_solr_4_10/solr/solrj/   (props changed)
    lucene/dev/branches/lucene_solr_4_10/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java

Modified: lucene/dev/branches/lucene_solr_4_10/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/solr/CHANGES.txt?rev=1619817&r1=1619816&r2=1619817&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/solr/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_10/solr/CHANGES.txt Fri Aug 22 14:15:27 2014
@@ -233,7 +233,10 @@ Bug Fixes
 
 * SOLR-6402: OverseerCollectionProcessor should not exit for ZooKeeper ConnectionLoss.
   (Jessica Cheng via Mark Miller)
-
+  
+* SOLR-6405: ZooKeeper calls can easily not be retried enough on ConnectionLoss.
+  (Jessica Cheng, Mark Miller)
+ 
 Optimizations
 ---------------------
 

Modified: lucene/dev/branches/lucene_solr_4_10/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java?rev=1619817&r1=1619816&r2=1619817&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java Fri Aug 22 14:15:27 2014
@@ -24,6 +24,8 @@ import java.util.concurrent.atomic.Atomi
 import junit.framework.Assert;
 
 import org.apache.solr.common.cloud.SolrZkClient;
+import org.apache.solr.common.cloud.ZkCmdExecutor;
+import org.apache.solr.common.cloud.ZkOperation;
 import org.apache.solr.util.AbstractSolrTestCase;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.WatchedEvent;
@@ -198,6 +200,42 @@ public class ZkSolrClientTest extends Ab
       }
     }
   }
+  
+  public void testZkCmdExectutor() throws Exception {
+    String zkDir = createTempDir("zkData").getAbsolutePath();
+    ZkTestServer server = null;
+
+    try {
+      server = new ZkTestServer(zkDir);
+      server.run();
+      AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
+      AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
+
+      final int timeout = random().nextInt(10000) + 5000;
+      
+      ZkCmdExecutor zkCmdExecutor = new ZkCmdExecutor(timeout);
+      final long start = System.nanoTime();
+      try {
+      zkCmdExecutor.retryOperation(new ZkOperation() {
+        @Override
+        public String execute() throws KeeperException, InterruptedException {
+          if (System.nanoTime() - start > TimeUnit.NANOSECONDS.convert(timeout, TimeUnit.MILLISECONDS)) {
+            throw new KeeperException.SessionExpiredException();
+          } 
+          throw new KeeperException.ConnectionLossException();
+        }
+      });
+      } catch(KeeperException.SessionExpiredException e) {
+        
+      } catch (Exception e) {
+        fail("Expected " + KeeperException.SessionExpiredException.class.getSimpleName() + " but got " + e.getClass().getSimpleName());
+      }
+    } finally {
+      if (server != null) {
+        server.shutdown();
+      }
+    }
+  }
 
   public void testMultipleWatchesAsync() throws Exception {
     try (ZkConnection conn = new ZkConnection ()) {

Modified: lucene/dev/branches/lucene_solr_4_10/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java?rev=1619817&r1=1619816&r2=1619817&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java Fri Aug 22 14:15:27 2014
@@ -27,9 +27,10 @@ import org.apache.zookeeper.data.ACL;
 
 
 public class ZkCmdExecutor {
-  private long retryDelay = 1500L; // 500 ms over for padding
+  private long retryDelay = 1500L; // 1500 ms over for padding
   private int retryCount;
   private List<ACL> acl = ZooDefs.Ids.OPEN_ACL_UNSAFE;
+  private double timeouts;
   
   /**
    * TODO: At this point, this should probably take a SolrZkClient in
@@ -40,8 +41,8 @@ public class ZkCmdExecutor {
    *          with this class.
    */
   public ZkCmdExecutor(int timeoutms) {
-    double timeouts = timeoutms / 1000.0;
-    this.retryCount = Math.round(0.5f * ((float)Math.sqrt(8.0f * timeouts + 1.0f) - 1.0f));
+    timeouts = timeoutms / 1000.0;
+    this.retryCount = Math.round(0.5f * ((float)Math.sqrt(8.0f * timeouts + 1.0f) - 1.0f)) + 1;
   }
   
   public List<ACL> getAcl() {
@@ -84,7 +85,9 @@ public class ZkCmdExecutor {
             throw exception;
           }
         }
-        retryDelay(i);
+        if (i != retryCount -1) {
+          retryDelay(i);
+        }
       }
     }
     throw exception;
@@ -116,7 +119,7 @@ public class ZkCmdExecutor {
    */
   protected void retryDelay(int attemptCount) throws InterruptedException {
     if (attemptCount > 0) {
-      Thread.sleep(attemptCount * retryDelay);
+      Thread.sleep((attemptCount + 1) * retryDelay);
     }
   }