You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2015/03/09 11:33:01 UTC

svn commit: r1665175 - in /lucene/dev/branches/branch_5x: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/cloud/ solr/core/src/test/org/apache/solr/cloud/ solr/core/src/test/org/apache/solr/cloud/overseer/ solr/solrj/ solr/solrj/src/java/org/apa...

Author: romseygeek
Date: Mon Mar  9 10:33:01 2015
New Revision: 1665175

URL: http://svn.apache.org/r1665175
Log:
SOLR-4044: CloudSolrClient.connect() can take a timeout parameter to wait for the cluster

Added:
    lucene/dev/branches/branch_5x/solr/solrj/src/test/org/apache/solr/client/solrj/impl/TestCloudSolrClientConnections.java
      - copied unchanged from r1665174, lucene/dev/trunk/solr/solrj/src/test/org/apache/solr/client/solrj/impl/TestCloudSolrClientConnections.java
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/solr/core/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateWriterTest.java
    lucene/dev/branches/branch_5x/solr/solrj/   (props changed)
    lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
    lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
    lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
    lucene/dev/branches/branch_5x/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java

Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1665175&r1=1665174&r2=1665175&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Mon Mar  9 10:33:01 2015
@@ -98,6 +98,10 @@ New Features
 
 * SOLR-5846: EnumField supports DocValues functionality. (Elran Dvir, shalin)
 
+* SOLR-4044: CloudSolrClient.connect() throws a more useful exception if the
+  cluster is not ready, and can now take an optional timeout argument to wait
+  for the cluster. (Alan Woodward, shalin, yonik, Mark Miller, Vitaliy Zhovtyuk)
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java?rev=1665175&r1=1665174&r2=1665175&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/cloud/ZkController.java Mon Mar  9 10:33:01 2015
@@ -601,6 +601,20 @@ public final class ZkController {
     return zkServerAddress;
   }
 
+  /**
+   * Create the zknodes necessary for a cluster to operate
+   * @param zkClient a SolrZkClient
+   * @throws KeeperException if there is a Zookeeper error
+   * @throws InterruptedException on interrupt
+   */
+  public static void createClusterZkNodes(SolrZkClient zkClient) throws KeeperException, InterruptedException {
+    ZkCmdExecutor cmdExecutor = new ZkCmdExecutor(zkClient.getZkClientTimeout());
+    cmdExecutor.ensureExists(ZkStateReader.LIVE_NODES_ZKNODE, zkClient);
+    cmdExecutor.ensureExists(ZkStateReader.COLLECTIONS_ZKNODE, zkClient);
+    cmdExecutor.ensureExists(ZkStateReader.ALIASES, zkClient);
+    cmdExecutor.ensureExists(ZkStateReader.CLUSTER_STATE, zkClient);
+  }
+
   private void init(CurrentCoreDescriptorProvider registerOnReconnect) {
 
     try {
@@ -612,11 +626,9 @@ public final class ZkController {
         publishAndWaitForDownStates();
       }
       
-      // makes nodes zkNode
-      cmdExecutor.ensureExists(ZkStateReader.LIVE_NODES_ZKNODE, zkClient);
-      
+      createClusterZkNodes(zkClient);
+
       createEphemeralLiveNode();
-      cmdExecutor.ensureExists(ZkStateReader.COLLECTIONS_ZKNODE, zkClient);
 
       ShardHandler shardHandler;
       UpdateShardHandler updateShardHandler;

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java?rev=1665175&r1=1665174&r2=1665175&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/OverseerTest.java Mon Mar  9 10:33:01 2015
@@ -86,6 +86,9 @@ public class OverseerTest extends SolrTe
     public MockZKController(String zkAddress, String nodeName) throws InterruptedException, TimeoutException, IOException, KeeperException {
       this.nodeName = nodeName;
       zkClient = new SolrZkClient(zkAddress, TIMEOUT);
+
+      ZkController.createClusterZkNodes(zkClient);
+
       zkStateReader = new ZkStateReader(zkClient);
       zkStateReader.createClusterStateWatchersAndUpdate();
       
@@ -235,7 +238,7 @@ public class OverseerTest extends SolrTe
       AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
       
       zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
-      zkClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
+      ZkController.createClusterZkNodes(zkClient);
 
       overseerClient = electNewOverseer(server.getZkAddress());
 
@@ -290,7 +293,7 @@ public class OverseerTest extends SolrTe
       AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
       
       zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
-      zkClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
+      ZkController.createClusterZkNodes(zkClient);
 
       overseerClient = electNewOverseer(server.getZkAddress());
 
@@ -370,7 +373,7 @@ public class OverseerTest extends SolrTe
       AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
 
       zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
-      zkClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
+      ZkController.createClusterZkNodes(zkClient);
       
       overseerClient = electNewOverseer(server.getZkAddress());
 
@@ -534,7 +537,7 @@ public class OverseerTest extends SolrTe
       
       AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
       AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
-      zkClient.makePath("/live_nodes", true);
+      ZkController.createClusterZkNodes(zkClient);
 
       reader = new ZkStateReader(zkClient);
       reader.createClusterStateWatchersAndUpdate();
@@ -632,8 +635,8 @@ public class OverseerTest extends SolrTe
       AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
       
       zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
-      
-      zkClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
+
+      ZkController.createClusterZkNodes(zkClient);
       
       reader = new ZkStateReader(zkClient);
       reader.createClusterStateWatchersAndUpdate();
@@ -751,7 +754,7 @@ public class OverseerTest extends SolrTe
       controllerClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
       AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
       AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
-      controllerClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
+      ZkController.createClusterZkNodes(controllerClient);
 
       killer = new OverseerRestarter(server.getZkAddress());
       killerThread = new Thread(killer);
@@ -808,7 +811,7 @@ public class OverseerTest extends SolrTe
       
       AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
       AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
-      controllerClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
+      ZkController.createClusterZkNodes(controllerClient);
       
       reader = new ZkStateReader(controllerClient);
       reader.createClusterStateWatchersAndUpdate();
@@ -872,8 +875,8 @@ public class OverseerTest extends SolrTe
       
       AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
       AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
-      controllerClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
-      
+      ZkController.createClusterZkNodes(controllerClient);
+
       reader = new ZkStateReader(controllerClient);
       reader.createClusterStateWatchersAndUpdate();
 
@@ -914,7 +917,7 @@ public class OverseerTest extends SolrTe
 
       AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
       AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
-      controllerClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
+      ZkController.createClusterZkNodes(controllerClient);
 
       reader = new ZkStateReader(controllerClient);
       reader.createClusterStateWatchersAndUpdate();
@@ -1046,7 +1049,7 @@ public class OverseerTest extends SolrTe
       zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
       AbstractZkTestCase.tryCleanSolrZkNode(server.getZkHost());
       AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
-      zkClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
+      ZkController.createClusterZkNodes(zkClient);
 
       reader = new ZkStateReader(zkClient);
       reader.createClusterStateWatchersAndUpdate();

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateWriterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateWriterTest.java?rev=1665175&r1=1665174&r2=1665175&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateWriterTest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/cloud/overseer/ZkStateWriterTest.java Mon Mar  9 10:33:01 2015
@@ -17,14 +17,12 @@ package org.apache.solr.cloud.overseer;
  * limitations under the License.
  */
 
-import java.util.HashMap;
-import java.util.Map;
-
 import org.apache.lucene.util.IOUtils;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.cloud.AbstractZkTestCase;
 import org.apache.solr.cloud.Overseer;
 import org.apache.solr.cloud.OverseerTest;
+import org.apache.solr.cloud.ZkController;
 import org.apache.solr.cloud.ZkTestServer;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocCollection;
@@ -33,6 +31,9 @@ import org.apache.solr.common.cloud.Slic
 import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkStateReader;
 
+import java.util.HashMap;
+import java.util.Map;
+
 public class ZkStateWriterTest extends SolrTestCaseJ4 {
 
   public void testZkStateWriterBatching() throws Exception {
@@ -48,7 +49,7 @@ public class ZkStateWriterTest extends S
       AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
 
       zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT);
-      zkClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
+      ZkController.createClusterZkNodes(zkClient);
 
       ZkStateReader reader = new ZkStateReader(zkClient);
       reader.createClusterStateWatchersAndUpdate();
@@ -131,7 +132,7 @@ public class ZkStateWriterTest extends S
       AbstractZkTestCase.makeSolrZkNode(server.getZkHost());
 
       zkClient = new SolrZkClient(server.getZkAddress(), OverseerTest.DEFAULT_CONNECTION_TIMEOUT);
-      zkClient.makePath(ZkStateReader.LIVE_NODES_ZKNODE, true);
+      ZkController.createClusterZkNodes(zkClient);
 
       ZkStateReader reader = new ZkStateReader(zkClient);
       reader.createClusterStateWatchersAndUpdate();

Modified: lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java?rev=1665175&r1=1665174&r2=1665175&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java (original)
+++ lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java Mon Mar  9 10:33:01 2015
@@ -74,6 +74,8 @@ import java.util.concurrent.ExecutionExc
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 
 /**
  * SolrJ client class to communicate with SolrCloud.
@@ -472,6 +474,30 @@ public class CloudSolrClient extends Sol
     }
   }
 
+  /**
+   * Connect to a cluster.  If the cluster is not ready, retry connection up to a given timeout.
+   * @param duration the timeout
+   * @param timeUnit the units of the timeout
+   * @throws TimeoutException if the cluster is not ready after the timeout
+   * @throws InterruptedException if the wait is interrupted
+   */
+  public void connect(long duration, TimeUnit timeUnit) throws TimeoutException, InterruptedException {
+    log.info("Waiting for {} {} for cluster at {} to be ready", duration, timeUnit, zkHost);
+    long timeout = System.nanoTime() + timeUnit.toNanos(duration);
+    while (System.nanoTime() < timeout) {
+      try {
+        connect();
+        log.info("Cluster at {} ready", zkHost);
+        return;
+      }
+      catch (RuntimeException e) {
+        // not ready yet, then...
+      }
+      TimeUnit.MILLISECONDS.sleep(250);
+    }
+    throw new TimeoutException("Timed out waiting for cluster");
+  }
+
   public void setParallelUpdates(boolean parallelUpdates) {
     this.parallelUpdates = parallelUpdates;
   }

Modified: lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java?rev=1665175&r1=1665174&r2=1665175&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java (original)
+++ lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java Mon Mar  9 10:33:01 2015
@@ -733,4 +733,11 @@ public class SolrZkClient implements Clo
     return e;
   }
 
+  /**
+   * @return the address of the zookeeper cluster
+   */
+  public String getZkServerAddress() {
+    return zkServerAddress;
+  }
+
 }

Modified: lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java?rev=1665175&r1=1665174&r2=1665175&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java (original)
+++ lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/cloud/ZkStateReader.java Mon Mar  9 10:33:01 2015
@@ -295,12 +295,10 @@ public class ZkStateReader implements Cl
     // We need to fetch the current cluster state and the set of live nodes
     
     synchronized (getUpdateLock()) {
-      cmdExecutor.ensureExists(CLUSTER_STATE, zkClient);
-      cmdExecutor.ensureExists(ALIASES, zkClient);
-      
+
       log.info("Updating cluster state from ZooKeeper... ");
       
-      zkClient.exists(CLUSTER_STATE, new Watcher() {
+      Stat stat = zkClient.exists(CLUSTER_STATE, new Watcher() {
         
         @Override
         public void process(WatchedEvent event) {
@@ -339,6 +337,10 @@ public class ZkStateReader implements Cl
         }
         
       }, true);
+
+      if (stat == null)
+        throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE,
+            "Cannot connect to cluster at " + zkClient.getZkServerAddress() + ": cluster not found/not ready");
     }
    
     

Modified: lucene/dev/branches/branch_5x/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java?rev=1665175&r1=1665174&r2=1665175&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java (original)
+++ lucene/dev/branches/branch_5x/solr/solrj/src/test/org/apache/solr/client/solrj/impl/CloudSolrClientTest.java Mon Mar  9 10:33:01 2015
@@ -46,10 +46,11 @@ import org.apache.solr.common.params.Mod
 import org.apache.solr.common.params.ShardParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.zookeeper.KeeperException;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
+import org.junit.Rule;
 import org.junit.Test;
+import org.junit.rules.ExpectedException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -612,17 +613,20 @@ public class CloudSolrClientTest extends
     }
   }
 
+  @Rule
+  public ExpectedException exception = ExpectedException.none();
+
   public void testWrongZkChrootTest() throws IOException {
+
+    exception.expect(SolrException.class);
+    exception.expectMessage("cluster not found/not ready");
+
     try (CloudSolrClient client = new CloudSolrClient(zkServer.getZkAddress() + "/xyz/foo")) {
       client.setDefaultCollection(DEFAULT_COLLECTION);
       client.setZkClientTimeout(1000 * 60);
       client.connect();
       fail("Expected exception");
-    } catch(SolrException e) {
-      assertTrue(e.getCause() instanceof KeeperException);
     }
-    // see SOLR-6146 - this test will fail by virtue of the zkClient tracking performed
-    // in the afterClass method of the base class
   }
 
   public void customHttpClientTest() throws IOException {