You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by ma...@apache.org on 2010/02/11 02:48:36 UTC

svn commit: r908786 - in /lucene/solr/branches/cloud/src: java/org/apache/solr/cloud/ java/org/apache/solr/core/ test/org/apache/solr/cloud/

Author: markrmiller
Date: Thu Feb 11 01:48:35 2010
New Revision: 908786

URL: http://svn.apache.org/viewvc?rev=908786&view=rev
Log:
add basic code to deal with live nodes and quick server bounces and a test for the issue

Added:
    lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZooKeeper.java
Modified:
    lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ConnectionManager.java
    lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/DefaultConnectionStrategy.java
    lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZkClient.java
    lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkClientConnectionStrategy.java
    lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkController.java
    lucene/solr/branches/cloud/src/java/org/apache/solr/core/CoreContainer.java
    lucene/solr/branches/cloud/src/test/org/apache/solr/cloud/CloudStateUpdateTest.java

Modified: lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ConnectionManager.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ConnectionManager.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ConnectionManager.java (original)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ConnectionManager.java Thu Feb 11 01:48:35 2010
@@ -24,7 +24,6 @@
 import org.apache.solr.cloud.SolrZkClient.OnReconnect;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
-import org.apache.zookeeper.ZooKeeper;
 import org.apache.zookeeper.Watcher.Event.KeeperState;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -85,7 +84,7 @@
       try {
         connectionStrategy.reconnect(zkServerAddress, zkClientTimeout, this, new ZkClientConnectionStrategy.ZkUpdate() {
           @Override
-          public void update(ZooKeeper keeper) throws InterruptedException, TimeoutException, IOException {
+          public void update(SolrZooKeeper keeper) throws InterruptedException, TimeoutException, IOException {
            waitForConnected(SolrZkClient.DEFAULT_CLIENT_CONNECT_TIMEOUT);
            client.updateKeeper(keeper);
            if(onReconnect != null) {

Modified: lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/DefaultConnectionStrategy.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/DefaultConnectionStrategy.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/DefaultConnectionStrategy.java (original)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/DefaultConnectionStrategy.java Thu Feb 11 01:48:35 2010
@@ -24,7 +24,6 @@
 import java.util.concurrent.TimeoutException;
 
 import org.apache.zookeeper.Watcher;
-import org.apache.zookeeper.ZooKeeper;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -38,7 +37,7 @@
   
   @Override
   public void connect(String serverAddress, int timeout, Watcher watcher, ZkUpdate updater) throws IOException, InterruptedException, TimeoutException {
-    updater.update(new ZooKeeper(serverAddress, timeout, watcher));
+    updater.update(new SolrZooKeeper(serverAddress, timeout, watcher));
   }
 
   @Override
@@ -52,7 +51,7 @@
         log.info("Attempting the connect...");
         boolean connected = false;
         try {
-          updater.update(new ZooKeeper(serverAddress, zkClientTimeout, watcher));
+          updater.update(new SolrZooKeeper(serverAddress, zkClientTimeout, watcher));
           // nocommit
           log.info("Reconnected to ZooKeeper");
           connected = true;

Modified: lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZkClient.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZkClient.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZkClient.java (original)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZkClient.java Thu Feb 11 01:48:35 2010
@@ -60,7 +60,7 @@
 
   private ConnectionManager connManager;
 
-  volatile ZooKeeper keeper;
+  volatile SolrZooKeeper keeper;
   
   /**
    * @param zkServerAddress
@@ -111,7 +111,7 @@
     strat.connect(zkServerAddress, zkClientTimeout, connManager,
         new ZkUpdate() {
           @Override
-          public void update(ZooKeeper zooKeeper) {
+          public void update(SolrZooKeeper zooKeeper) {
             if (keeper != null) {
               try {
                 keeper.close();
@@ -484,7 +484,7 @@
    * 
    * @param keeper
    */
-  void updateKeeper(ZooKeeper keeper) {
+  void updateKeeper(SolrZooKeeper keeper) {
     // nocommit
    log.info("Updating ZooKeeper instance:" + keeper);
    this.keeper = keeper;

Added: lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZooKeeper.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZooKeeper.java?rev=908786&view=auto
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZooKeeper.java (added)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZooKeeper.java Thu Feb 11 01:48:35 2010
@@ -0,0 +1,21 @@
+package org.apache.solr.cloud;
+
+import java.io.IOException;
+
+import org.apache.zookeeper.ClientCnxn;
+import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.ZooKeeper;
+
+public class SolrZooKeeper extends ZooKeeper {
+
+  public SolrZooKeeper(String connectString, int sessionTimeout, Watcher watcher)
+      throws IOException {
+    super(connectString, sessionTimeout, watcher);
+    // TODO Auto-generated constructor stub
+  }
+  
+  protected ClientCnxn getConnection() {
+    return cnxn;
+  }
+
+}

Modified: lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkClientConnectionStrategy.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkClientConnectionStrategy.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkClientConnectionStrategy.java (original)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkClientConnectionStrategy.java Thu Feb 11 01:48:35 2010
@@ -21,7 +21,6 @@
 import java.util.concurrent.TimeoutException;
 
 import org.apache.zookeeper.Watcher;
-import org.apache.zookeeper.ZooKeeper;
 
 /**
  *
@@ -31,7 +30,7 @@
   public abstract void reconnect(String serverAddress, int zkClientTimeout, Watcher watcher, ZkUpdate updater) throws IOException, InterruptedException, TimeoutException;
   
   public static abstract class ZkUpdate {
-    public abstract void update(ZooKeeper zooKeeper) throws InterruptedException, TimeoutException, IOException;
+    public abstract void update(SolrZooKeeper zooKeeper) throws InterruptedException, TimeoutException, IOException;
   }
   
 }

Modified: lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkController.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkController.java Thu Feb 11 01:48:35 2010
@@ -347,13 +347,32 @@
       
     };
     try {
-      if (!readonly)
+      if (!readonly) {
+        boolean nodeDeleted = true;
+        try {
+          // we attempt a delete in the case of a quick server bounce -
+          // if there was not a graceful shutdown, the node may exist
+          // until expiration timeout - so a node won't be created here because
+          // it exists, but eventually the node will be removed. So delete
+          // in case it exists and create a new node.
+          zkClient.delete(nodePath, -1);
+        } catch (KeeperException.NoNodeException e) {
+          // fine if there is nothing to delete
+          nodeDeleted = false;
+        }
+        if (nodeDeleted) {
+          log
+              .info("Found a previous node that still exists while trying to register a new live node "
+                  + nodePath + " - removing existing node to create another.");
+        }
         zkClient.makePath(nodePath, CreateMode.EPHEMERAL);
+      }
     } catch (KeeperException e) {
       // its okay if the node already exists
       if (e.code() != KeeperException.Code.NODEEXISTS) {
         throw e;
       }
+      System.out.println("NODE ALREADY EXISTS");
     }
     zkClient.getChildren(NODES_ZKNODE, liveNodeWatcher);
   }

Modified: lucene/solr/branches/cloud/src/java/org/apache/solr/core/CoreContainer.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/core/CoreContainer.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/core/CoreContainer.java (original)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/core/CoreContainer.java Thu Feb 11 01:48:35 2010
@@ -61,7 +61,6 @@
   protected boolean persistent = false;
   protected String adminPath = null;
   protected String managementPath = null;
-  protected int zkClientTimeout;
   protected String hostPort;
   protected String hostContext;
   protected String host;
@@ -322,7 +321,7 @@
       zkHost = cfg.get("solr/@zkHost" , null);
       adminPath = cfg.get("solr/cores/@adminPath", null);
       shareSchema = cfg.getBool("solr/cores/@shareSchema", false);
-      zkClientTimeout = cfg.getInt("solr/cores/@zkClientTimeout", 10000);
+      int zkClientTimeout = cfg.getInt("solr/cores/@zkClientTimeout", 10000);
       if (zkPortOverride == null) {
         hostPort = System.getProperty("hostPort");
         if (hostPort == null) {
@@ -340,6 +339,7 @@
       adminHandler  = cfg.get("solr/cores/@adminHandler", null );
       managementPath  = cfg.get("solr/cores/@managementPath", null );
       
+      zkClientTimeout = Integer.parseInt(System.getProperty("zkClientTimeout", Integer.toString(zkClientTimeout)));
       initZooKeeper(zkHost, zkClientTimeout);
 
       if (libDir != null) {

Modified: lucene/solr/branches/cloud/src/test/org/apache/solr/cloud/CloudStateUpdateTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/test/org/apache/solr/cloud/CloudStateUpdateTest.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/test/org/apache/solr/cloud/CloudStateUpdateTest.java (original)
+++ lucene/solr/branches/cloud/src/test/org/apache/solr/cloud/CloudStateUpdateTest.java Thu Feb 11 01:48:35 2010
@@ -26,6 +26,7 @@
 import org.apache.solr.core.CoreContainer;
 import org.apache.solr.core.CoreDescriptor;
 import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.CoreContainer.Initializer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -39,7 +40,11 @@
   protected File tmpDir = new File(System.getProperty("java.io.tmpdir")
       + System.getProperty("file.separator") + getClass().getName() + "-"
       + System.currentTimeMillis());
-  
+
+  protected String getSolrConfigFilename() {
+    return "solr.lowZkTimeout.xml";
+  }
+
   private static final boolean VERBOSE = true;
 
   protected ZkTestServer zkServer;
@@ -47,15 +52,22 @@
   protected String zkDir;
 
   private CoreContainer container1;
+
   private CoreContainer container2;
+
   private CoreContainer container3;
 
   private File dataDir1;
+
   private File dataDir2;
+
   private File dataDir3;
 
+  private Initializer init2;
+
   public void setUp() throws Exception {
     try {
+      System.setProperty("zkClientTimeout", "3000");
       System.setProperty("zkHost", AbstractZkTestCase.ZOO_KEEPER_ADDRESS);
       zkDir = tmpDir.getAbsolutePath() + File.separator
           + "zookeeper/server1/data";
@@ -70,7 +82,7 @@
 
       dataDir2 = new File(tmpDir + File.separator + "data2");
       dataDir2.mkdirs();
-      
+
       dataDir3 = new File(tmpDir + File.separator + "data3");
       dataDir3.mkdirs();
 
@@ -87,7 +99,7 @@
 
       container1 = init1.initialize();
 
-      CoreContainer.Initializer init2 = new CoreContainer.Initializer() {
+      init2 = new CoreContainer.Initializer() {
         {
           this.dataDir = CloudStateUpdateTest.this.dataDir2.getAbsolutePath();
           this.zkPortOverride = "8984";
@@ -95,7 +107,7 @@
       };
 
       container2 = init2.initialize();
-      
+
       CoreContainer.Initializer init3 = new CoreContainer.Initializer() {
         {
           this.dataDir = CloudStateUpdateTest.this.dataDir3.getAbsolutePath();
@@ -111,58 +123,74 @@
     log.info("####SETUP_END " + getName());
 
   }
-  
+
   public void testCoreRegistration() throws Exception {
     System.setProperty("CLOUD_UPDATE_DELAY", "1");
-    CoreDescriptor dcore= new CoreDescriptor(container1, "testcore", "testcore");
+    CoreDescriptor dcore = new CoreDescriptor(container1, "testcore",
+        "testcore");
 
     SolrCore core = container1.create(dcore);
     container1.register(core, false);
-    
-    // slight pause - TODO: takes an oddly long amount of time to schedule tasks with almost no delay ...
+
+    // slight pause - TODO: takes an oddly long amount of time to schedule tasks
+    // with almost no delay ...
     Thread.sleep(5000);
-    
+
     ZkController zkController2 = container2.getZkController();
-    
+
     String host = zkController2.getHostName();
-    
+
     CloudState cloudState2 = zkController2.getCloudState();
     Map<String,Slice> slices = cloudState2.getSlices("testcore");
-    
+
     assertNotNull(slices);
     assertTrue(slices.containsKey(host + ":8983_solr_testcore"));
-    
+
     Slice slice = slices.get(host + ":8983_solr_testcore");
     assertEquals(host + ":8983_solr_testcore", slice.getName());
-    
+
     Map<String,ZkNodeProps> shards = slice.getShards();
-    
+
     assertEquals(1, shards.size());
-    
+
     ZkNodeProps zkProps = shards.get(host + ":8983_solr_testcore");
-    
+
     assertNotNull(zkProps);
-    
+
     assertEquals(host + ":8983_solr", zkProps.get("node_name"));
-    
+
     assertEquals("http://" + host + ":8983/solr/testcore", zkProps.get("url"));
-    
+
     Set<String> liveNodes = cloudState2.getLiveNodes();
     assertNotNull(liveNodes);
     assertEquals(3, liveNodes.size());
-    
+
     container3.shutdown();
-    
+
     liveNodes = zkController2.getCloudState().getLiveNodes();
-    
+
     // slight pause for watch to trigger
     Thread.sleep(500);
-    
+
     assertEquals(2, liveNodes.size());
+
+    // quickly kill / start client
+
+    container2.getZkController().getZkClient().keeper.getConnection()
+        .disconnect();
+    container2.shutdown();
+
+    container2 = init2.initialize();
+
+    Thread.sleep(8000);
+
+    assertTrue(container1.getZkController().getCloudState().liveNodesContain(
+        container2.getZkController().getNodeName()));
+
   }
 
   public void tearDown() throws Exception {
-    if(VERBOSE) {
+    if (VERBOSE) {
       printLayout();
     }
     super.tearDown();