You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by ma...@apache.org on 2010/02/11 02:48:36 UTC
svn commit: r908786 - in /lucene/solr/branches/cloud/src:
java/org/apache/solr/cloud/ java/org/apache/solr/core/
test/org/apache/solr/cloud/
Author: markrmiller
Date: Thu Feb 11 01:48:35 2010
New Revision: 908786
URL: http://svn.apache.org/viewvc?rev=908786&view=rev
Log:
add basic code to deal with live nodes and quick server bounces and a test for the issue
Added:
lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZooKeeper.java
Modified:
lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ConnectionManager.java
lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/DefaultConnectionStrategy.java
lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZkClient.java
lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkClientConnectionStrategy.java
lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkController.java
lucene/solr/branches/cloud/src/java/org/apache/solr/core/CoreContainer.java
lucene/solr/branches/cloud/src/test/org/apache/solr/cloud/CloudStateUpdateTest.java
Modified: lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ConnectionManager.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ConnectionManager.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ConnectionManager.java (original)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ConnectionManager.java Thu Feb 11 01:48:35 2010
@@ -24,7 +24,6 @@
import org.apache.solr.cloud.SolrZkClient.OnReconnect;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
-import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.Watcher.Event.KeeperState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -85,7 +84,7 @@
try {
connectionStrategy.reconnect(zkServerAddress, zkClientTimeout, this, new ZkClientConnectionStrategy.ZkUpdate() {
@Override
- public void update(ZooKeeper keeper) throws InterruptedException, TimeoutException, IOException {
+ public void update(SolrZooKeeper keeper) throws InterruptedException, TimeoutException, IOException {
waitForConnected(SolrZkClient.DEFAULT_CLIENT_CONNECT_TIMEOUT);
client.updateKeeper(keeper);
if(onReconnect != null) {
Modified: lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/DefaultConnectionStrategy.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/DefaultConnectionStrategy.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/DefaultConnectionStrategy.java (original)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/DefaultConnectionStrategy.java Thu Feb 11 01:48:35 2010
@@ -24,7 +24,6 @@
import java.util.concurrent.TimeoutException;
import org.apache.zookeeper.Watcher;
-import org.apache.zookeeper.ZooKeeper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -38,7 +37,7 @@
@Override
public void connect(String serverAddress, int timeout, Watcher watcher, ZkUpdate updater) throws IOException, InterruptedException, TimeoutException {
- updater.update(new ZooKeeper(serverAddress, timeout, watcher));
+ updater.update(new SolrZooKeeper(serverAddress, timeout, watcher));
}
@Override
@@ -52,7 +51,7 @@
log.info("Attempting the connect...");
boolean connected = false;
try {
- updater.update(new ZooKeeper(serverAddress, zkClientTimeout, watcher));
+ updater.update(new SolrZooKeeper(serverAddress, zkClientTimeout, watcher));
// nocommit
log.info("Reconnected to ZooKeeper");
connected = true;
Modified: lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZkClient.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZkClient.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZkClient.java (original)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZkClient.java Thu Feb 11 01:48:35 2010
@@ -60,7 +60,7 @@
private ConnectionManager connManager;
- volatile ZooKeeper keeper;
+ volatile SolrZooKeeper keeper;
/**
* @param zkServerAddress
@@ -111,7 +111,7 @@
strat.connect(zkServerAddress, zkClientTimeout, connManager,
new ZkUpdate() {
@Override
- public void update(ZooKeeper zooKeeper) {
+ public void update(SolrZooKeeper zooKeeper) {
if (keeper != null) {
try {
keeper.close();
@@ -484,7 +484,7 @@
*
* @param keeper
*/
- void updateKeeper(ZooKeeper keeper) {
+ void updateKeeper(SolrZooKeeper keeper) {
// nocommit
log.info("Updating ZooKeeper instance:" + keeper);
this.keeper = keeper;
Added: lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZooKeeper.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZooKeeper.java?rev=908786&view=auto
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZooKeeper.java (added)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/SolrZooKeeper.java Thu Feb 11 01:48:35 2010
@@ -0,0 +1,21 @@
+package org.apache.solr.cloud;
+
+import java.io.IOException;
+
+import org.apache.zookeeper.ClientCnxn;
+import org.apache.zookeeper.Watcher;
+import org.apache.zookeeper.ZooKeeper;
+
+public class SolrZooKeeper extends ZooKeeper {
+
+ public SolrZooKeeper(String connectString, int sessionTimeout, Watcher watcher)
+ throws IOException {
+ super(connectString, sessionTimeout, watcher);
+ // TODO Auto-generated constructor stub
+ }
+
+ protected ClientCnxn getConnection() {
+ return cnxn;
+ }
+
+}
Modified: lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkClientConnectionStrategy.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkClientConnectionStrategy.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkClientConnectionStrategy.java (original)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkClientConnectionStrategy.java Thu Feb 11 01:48:35 2010
@@ -21,7 +21,6 @@
import java.util.concurrent.TimeoutException;
import org.apache.zookeeper.Watcher;
-import org.apache.zookeeper.ZooKeeper;
/**
*
@@ -31,7 +30,7 @@
public abstract void reconnect(String serverAddress, int zkClientTimeout, Watcher watcher, ZkUpdate updater) throws IOException, InterruptedException, TimeoutException;
public static abstract class ZkUpdate {
- public abstract void update(ZooKeeper zooKeeper) throws InterruptedException, TimeoutException, IOException;
+ public abstract void update(SolrZooKeeper zooKeeper) throws InterruptedException, TimeoutException, IOException;
}
}
Modified: lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkController.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkController.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkController.java (original)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/cloud/ZkController.java Thu Feb 11 01:48:35 2010
@@ -347,13 +347,32 @@
};
try {
- if (!readonly)
+ if (!readonly) {
+ boolean nodeDeleted = true;
+ try {
+ // we attempt a delete in the case of a quick server bounce -
+ // if there was not a graceful shutdown, the node may exist
+ // until expiration timeout - so a node won't be created here because
+ // it exists, but eventually the node will be removed. So delete
+ // in case it exists and create a new node.
+ zkClient.delete(nodePath, -1);
+ } catch (KeeperException.NoNodeException e) {
+ // fine if there is nothing to delete
+ nodeDeleted = false;
+ }
+ if (nodeDeleted) {
+ log
+ .info("Found a previous node that still exists while trying to register a new live node "
+ + nodePath + " - removing existing node to create another.");
+ }
zkClient.makePath(nodePath, CreateMode.EPHEMERAL);
+ }
} catch (KeeperException e) {
// its okay if the node already exists
if (e.code() != KeeperException.Code.NODEEXISTS) {
throw e;
}
+ System.out.println("NODE ALREADY EXISTS");
}
zkClient.getChildren(NODES_ZKNODE, liveNodeWatcher);
}
Modified: lucene/solr/branches/cloud/src/java/org/apache/solr/core/CoreContainer.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/java/org/apache/solr/core/CoreContainer.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/java/org/apache/solr/core/CoreContainer.java (original)
+++ lucene/solr/branches/cloud/src/java/org/apache/solr/core/CoreContainer.java Thu Feb 11 01:48:35 2010
@@ -61,7 +61,6 @@
protected boolean persistent = false;
protected String adminPath = null;
protected String managementPath = null;
- protected int zkClientTimeout;
protected String hostPort;
protected String hostContext;
protected String host;
@@ -322,7 +321,7 @@
zkHost = cfg.get("solr/@zkHost" , null);
adminPath = cfg.get("solr/cores/@adminPath", null);
shareSchema = cfg.getBool("solr/cores/@shareSchema", false);
- zkClientTimeout = cfg.getInt("solr/cores/@zkClientTimeout", 10000);
+ int zkClientTimeout = cfg.getInt("solr/cores/@zkClientTimeout", 10000);
if (zkPortOverride == null) {
hostPort = System.getProperty("hostPort");
if (hostPort == null) {
@@ -340,6 +339,7 @@
adminHandler = cfg.get("solr/cores/@adminHandler", null );
managementPath = cfg.get("solr/cores/@managementPath", null );
+ zkClientTimeout = Integer.parseInt(System.getProperty("zkClientTimeout", Integer.toString(zkClientTimeout)));
initZooKeeper(zkHost, zkClientTimeout);
if (libDir != null) {
Modified: lucene/solr/branches/cloud/src/test/org/apache/solr/cloud/CloudStateUpdateTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/branches/cloud/src/test/org/apache/solr/cloud/CloudStateUpdateTest.java?rev=908786&r1=908785&r2=908786&view=diff
==============================================================================
--- lucene/solr/branches/cloud/src/test/org/apache/solr/cloud/CloudStateUpdateTest.java (original)
+++ lucene/solr/branches/cloud/src/test/org/apache/solr/cloud/CloudStateUpdateTest.java Thu Feb 11 01:48:35 2010
@@ -26,6 +26,7 @@
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.CoreContainer.Initializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -39,7 +40,11 @@
protected File tmpDir = new File(System.getProperty("java.io.tmpdir")
+ System.getProperty("file.separator") + getClass().getName() + "-"
+ System.currentTimeMillis());
-
+
+ protected String getSolrConfigFilename() {
+ return "solr.lowZkTimeout.xml";
+ }
+
private static final boolean VERBOSE = true;
protected ZkTestServer zkServer;
@@ -47,15 +52,22 @@
protected String zkDir;
private CoreContainer container1;
+
private CoreContainer container2;
+
private CoreContainer container3;
private File dataDir1;
+
private File dataDir2;
+
private File dataDir3;
+ private Initializer init2;
+
public void setUp() throws Exception {
try {
+ System.setProperty("zkClientTimeout", "3000");
System.setProperty("zkHost", AbstractZkTestCase.ZOO_KEEPER_ADDRESS);
zkDir = tmpDir.getAbsolutePath() + File.separator
+ "zookeeper/server1/data";
@@ -70,7 +82,7 @@
dataDir2 = new File(tmpDir + File.separator + "data2");
dataDir2.mkdirs();
-
+
dataDir3 = new File(tmpDir + File.separator + "data3");
dataDir3.mkdirs();
@@ -87,7 +99,7 @@
container1 = init1.initialize();
- CoreContainer.Initializer init2 = new CoreContainer.Initializer() {
+ init2 = new CoreContainer.Initializer() {
{
this.dataDir = CloudStateUpdateTest.this.dataDir2.getAbsolutePath();
this.zkPortOverride = "8984";
@@ -95,7 +107,7 @@
};
container2 = init2.initialize();
-
+
CoreContainer.Initializer init3 = new CoreContainer.Initializer() {
{
this.dataDir = CloudStateUpdateTest.this.dataDir3.getAbsolutePath();
@@ -111,58 +123,74 @@
log.info("####SETUP_END " + getName());
}
-
+
public void testCoreRegistration() throws Exception {
System.setProperty("CLOUD_UPDATE_DELAY", "1");
- CoreDescriptor dcore= new CoreDescriptor(container1, "testcore", "testcore");
+ CoreDescriptor dcore = new CoreDescriptor(container1, "testcore",
+ "testcore");
SolrCore core = container1.create(dcore);
container1.register(core, false);
-
- // slight pause - TODO: takes an oddly long amount of time to schedule tasks with almost no delay ...
+
+ // slight pause - TODO: takes an oddly long amount of time to schedule tasks
+ // with almost no delay ...
Thread.sleep(5000);
-
+
ZkController zkController2 = container2.getZkController();
-
+
String host = zkController2.getHostName();
-
+
CloudState cloudState2 = zkController2.getCloudState();
Map<String,Slice> slices = cloudState2.getSlices("testcore");
-
+
assertNotNull(slices);
assertTrue(slices.containsKey(host + ":8983_solr_testcore"));
-
+
Slice slice = slices.get(host + ":8983_solr_testcore");
assertEquals(host + ":8983_solr_testcore", slice.getName());
-
+
Map<String,ZkNodeProps> shards = slice.getShards();
-
+
assertEquals(1, shards.size());
-
+
ZkNodeProps zkProps = shards.get(host + ":8983_solr_testcore");
-
+
assertNotNull(zkProps);
-
+
assertEquals(host + ":8983_solr", zkProps.get("node_name"));
-
+
assertEquals("http://" + host + ":8983/solr/testcore", zkProps.get("url"));
-
+
Set<String> liveNodes = cloudState2.getLiveNodes();
assertNotNull(liveNodes);
assertEquals(3, liveNodes.size());
-
+
container3.shutdown();
-
+
liveNodes = zkController2.getCloudState().getLiveNodes();
-
+
// slight pause for watch to trigger
Thread.sleep(500);
-
+
assertEquals(2, liveNodes.size());
+
+ // quickly kill / start client
+
+ container2.getZkController().getZkClient().keeper.getConnection()
+ .disconnect();
+ container2.shutdown();
+
+ container2 = init2.initialize();
+
+ Thread.sleep(8000);
+
+ assertTrue(container1.getZkController().getCloudState().liveNodesContain(
+ container2.getZkController().getNodeName()));
+
}
public void tearDown() throws Exception {
- if(VERBOSE) {
+ if (VERBOSE) {
printLayout();
}
super.tearDown();