You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by ma...@apache.org on 2010/03/19 18:16:33 UTC
svn commit: r925339 - in /hadoop/zookeeper/branches/branch-3.2: ./
src/docs/src/documentation/content/xdocs/
src/java/main/org/apache/zookeeper/server/
src/java/test/org/apache/zookeeper/test/
Author: mahadev
Date: Fri Mar 19 17:16:33 2010
New Revision: 925339
URL: http://svn.apache.org/viewvc?rev=925339&view=rev
Log:
ZOOKEEPER-710. permanent ZSESSIONMOVED error after client app reconnects to zookeeper cluster (phunt via mahadev)
Modified:
hadoop/zookeeper/branches/branch-3.2/CHANGES.txt
hadoop/zookeeper/branches/branch-3.2/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml
hadoop/zookeeper/branches/branch-3.2/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java
hadoop/zookeeper/branches/branch-3.2/src/java/main/org/apache/zookeeper/server/NIOServerCnxn.java
hadoop/zookeeper/branches/branch-3.2/src/java/test/org/apache/zookeeper/test/QuorumTest.java
Modified: hadoop/zookeeper/branches/branch-3.2/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/branches/branch-3.2/CHANGES.txt?rev=925339&r1=925338&r2=925339&view=diff
==============================================================================
--- hadoop/zookeeper/branches/branch-3.2/CHANGES.txt (original)
+++ hadoop/zookeeper/branches/branch-3.2/CHANGES.txt Fri Mar 19 17:16:33 2010
@@ -1,3 +1,11 @@
+3.2 branch
+
+Backward compatible changes:
+
+BUGFIXES:
+ ZOOKEEPER-710. permanent ZSESSIONMOVED error after client app reconnects to
+ zookeeper cluster (phunt via mahadev)
+
Release 3.2.2 - 2009-12-11
Backward compatible changes:
Modified: hadoop/zookeeper/branches/branch-3.2/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/branches/branch-3.2/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml?rev=925339&r1=925338&r2=925339&view=diff
==============================================================================
--- hadoop/zookeeper/branches/branch-3.2/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml (original)
+++ hadoop/zookeeper/branches/branch-3.2/src/docs/src/documentation/content/xdocs/zookeeperProgrammers.xml Fri Mar 19 17:16:33 2010
@@ -479,12 +479,13 @@
which has be reestablished on a different server. The normal cause of this error is
a client that sends a request to a server, but the network packet gets delayed, so
the client times out and connects to a new server. When the delayed packet arrives at
- the first server, the old server detects that the session has moved and sends back
- the SESSION_MOVED error. Clients normally do not see this error since they do not read
+ the first server, the old server detects that the session has moved, and closes the
+ client connection. Clients normally do not see this error since they do not read
from those old connections. (Old connections are usually closed.) One situation in which this
- error can be seen is when two clients try to reestablish the same connection using
+ condition can be seen is when two clients try to reestablish the same connection using
a saved session id and password. One of the clients will reestablish the connection
- and the second client will get the SessionMovedException.</para>
+ and the second client will be disconnected (causing the pair to attempt to re-establish
+ it's connection/session indefinitely).</para>
</section>
Modified: hadoop/zookeeper/branches/branch-3.2/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/branches/branch-3.2/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java?rev=925339&r1=925338&r2=925339&view=diff
==============================================================================
--- hadoop/zookeeper/branches/branch-3.2/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java (original)
+++ hadoop/zookeeper/branches/branch-3.2/src/java/main/org/apache/zookeeper/server/FinalRequestProcessor.java Fri Mar 19 17:16:33 2010
@@ -27,6 +27,7 @@ import org.apache.log4j.Logger;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.KeeperException.Code;
+import org.apache.zookeeper.KeeperException.SessionMovedException;
import org.apache.zookeeper.ZooDefs.OpCode;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Stat;
@@ -46,6 +47,7 @@ import org.apache.zookeeper.proto.SetWat
import org.apache.zookeeper.proto.SyncRequest;
import org.apache.zookeeper.proto.SyncResponse;
import org.apache.zookeeper.server.DataTree.ProcessTxnResult;
+import org.apache.zookeeper.server.NIOServerCnxn;
import org.apache.zookeeper.server.NIOServerCnxn.Factory;
import org.apache.zookeeper.server.ZooKeeperServer.ChangeRecord;
import org.apache.zookeeper.txn.CreateSessionTxn;
@@ -253,6 +255,17 @@ public class FinalRequestProcessor imple
rsp = new GetChildrenResponse(children);
break;
}
+ } catch (SessionMovedException e) {
+ // session moved is a connection level error, we need to tear
+ // down the connection otw ZOOKEEPER-710 might happen
+ // ie client on slow follower starts to renew session, fails
+ // before this completes, then tries the fast follower (leader)
+ // and is successful, however the initial renew is then
+ // successfully fwd/processed by the leader and as a result
+ // the client and leader disagree on where the client is most
+ // recently attached (and therefore invalid SESSION MOVED generated)
+ ((NIOServerCnxn)request.cnxn).sendBuffer(NIOServerCnxn.closeConn);
+ return;
} catch (KeeperException e) {
err = e.code();
} catch (Exception e) {
Modified: hadoop/zookeeper/branches/branch-3.2/src/java/main/org/apache/zookeeper/server/NIOServerCnxn.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/branches/branch-3.2/src/java/main/org/apache/zookeeper/server/NIOServerCnxn.java?rev=925339&r1=925338&r2=925339&view=diff
==============================================================================
--- hadoop/zookeeper/branches/branch-3.2/src/java/main/org/apache/zookeeper/server/NIOServerCnxn.java (original)
+++ hadoop/zookeeper/branches/branch-3.2/src/java/main/org/apache/zookeeper/server/NIOServerCnxn.java Fri Mar 19 17:16:33 2010
@@ -351,20 +351,22 @@ public class NIOServerCnxn implements Wa
void sendBuffer(ByteBuffer bb) {
try {
- // We check if write interest here because if it is NOT set, nothing
- // is queued, so
- // we can try to send the buffer right away without waking up the
- // selector
- if ((sk.interestOps() & SelectionKey.OP_WRITE) == 0) {
- try {
- sock.write(bb);
- } catch (IOException e) {
- // we are just doing best effort right now
+ if (bb != closeConn) {
+ // We check if write interest here because if it is NOT set, nothing
+ // is queued, so
+ // we can try to send the buffer right away without waking up the
+ // selector
+ if ((sk.interestOps() & SelectionKey.OP_WRITE) == 0) {
+ try {
+ sock.write(bb);
+ } catch (IOException e) {
+ // we are just doing best effort right now
+ }
+ }
+ // if there is nothing left to send, we are done
+ if (bb.remaining() == 0) {
+ return;
}
- }
- // if there is nothing left to send, we are done
- if (bb.remaining() == 0) {
- return;
}
synchronized (factory) {
sk.selector().wakeup();
Modified: hadoop/zookeeper/branches/branch-3.2/src/java/test/org/apache/zookeeper/test/QuorumTest.java
URL: http://svn.apache.org/viewvc/hadoop/zookeeper/branches/branch-3.2/src/java/test/org/apache/zookeeper/test/QuorumTest.java?rev=925339&r1=925338&r2=925339&view=diff
==============================================================================
--- hadoop/zookeeper/branches/branch-3.2/src/java/test/org/apache/zookeeper/test/QuorumTest.java (original)
+++ hadoop/zookeeper/branches/branch-3.2/src/java/test/org/apache/zookeeper/test/QuorumTest.java Fri Mar 19 17:16:33 2010
@@ -32,6 +32,7 @@ import org.apache.zookeeper.WatchedEvent
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.ZooKeeper;
+import org.apache.zookeeper.Watcher.Event.KeeperState;
import org.apache.zookeeper.data.Stat;
import org.apache.zookeeper.server.quorum.FollowerHandler;
import org.apache.zookeeper.server.quorum.Leader;
@@ -165,14 +166,14 @@ public class QuorumTest extends TestCase
@Test
public void testSessionMoved() throws IOException, InterruptedException, KeeperException {
String hostPorts[] = qb.hostPort.split(",");
- ZooKeeper zk = new DisconnectableZooKeeper(hostPorts[0], ClientBase.CONNECTION_TIMEOUT, new Watcher() {
+ DisconnectableZooKeeper zk = new DisconnectableZooKeeper(hostPorts[0], ClientBase.CONNECTION_TIMEOUT, new Watcher() {
public void process(WatchedEvent event) {
}});
zk.create("/sessionMoveTest", new byte[0], Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
// we want to loop through the list twice
for(int i = 0; i < hostPorts.length*2; i++) {
// This should stomp the zk handle
- ZooKeeper zknew = new DisconnectableZooKeeper(hostPorts[(i+1)%hostPorts.length], ClientBase.CONNECTION_TIMEOUT,
+ DisconnectableZooKeeper zknew = new DisconnectableZooKeeper(hostPorts[(i+1)%hostPorts.length], ClientBase.CONNECTION_TIMEOUT,
new Watcher() {public void process(WatchedEvent event) {
}},
zk.getSessionId(),
@@ -181,14 +182,23 @@ public class QuorumTest extends TestCase
try {
zk.setData("/", new byte[1], -1);
fail("Should have lost the connection");
- } catch(KeeperException.SessionMovedException e) {
+ } catch(KeeperException.ConnectionLossException e) {
}
- //zk.close();
+ zk.disconnect(); // close w/o closing session
zk = zknew;
}
zk.close();
}
+ private static class DiscoWatcher implements Watcher {
+ volatile boolean zkDisco = false;
+ public void process(WatchedEvent event) {
+ if (event.getState() == KeeperState.Disconnected) {
+ zkDisco = true;
+ }
+ }
+ }
+
@Test
/**
* Connect to two different servers with two different handles using the same session and
@@ -196,28 +206,37 @@ public class QuorumTest extends TestCase
*/
public void testSessionMove() throws IOException, InterruptedException, KeeperException {
String hps[] = qb.hostPort.split(",");
- ZooKeeper zk = new DisconnectableZooKeeper(hps[0], ClientBase.CONNECTION_TIMEOUT, new Watcher() {
- public void process(WatchedEvent event) {
- }});
+ DiscoWatcher oldWatcher = new DiscoWatcher();
+ ZooKeeper zk = new DisconnectableZooKeeper(hps[0],
+ ClientBase.CONNECTION_TIMEOUT, oldWatcher);
zk.create("/t1", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
// This should stomp the zk handle
- ZooKeeper zknew = new DisconnectableZooKeeper(hps[1], ClientBase.CONNECTION_TIMEOUT, new Watcher() {
- public void process(WatchedEvent event) {
- }}, zk.getSessionId(), zk.getSessionPasswd());
+ DiscoWatcher watcher = new DiscoWatcher();
+ ZooKeeper zknew = new DisconnectableZooKeeper(hps[1],
+ ClientBase.CONNECTION_TIMEOUT, watcher, zk.getSessionId(),
+ zk.getSessionPasswd());
zknew.create("/t2", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
try {
zk.create("/t3", new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
fail("Should have lost the connection");
- } catch(KeeperException.SessionMovedException e) {
+ } catch(KeeperException.ConnectionLossException e) {
+ // wait up to 30 seconds for the disco to be delivered
+ for (int i = 0; i < 30; i++) {
+ if (oldWatcher.zkDisco) {
+ break;
+ }
+ Thread.sleep(1000);
+ }
+ assertTrue(oldWatcher.zkDisco);
}
ArrayList<ZooKeeper> toClose = new ArrayList<ZooKeeper>();
toClose.add(zknew);
// Let's just make sure it can still move
for(int i = 0; i < 10; i++) {
- zknew = new DisconnectableZooKeeper(hps[1], ClientBase.CONNECTION_TIMEOUT, new Watcher() {
- public void process(WatchedEvent event) {
- }}, zk.getSessionId(), zk.getSessionPasswd());
+ zknew = new DisconnectableZooKeeper(hps[1],
+ ClientBase.CONNECTION_TIMEOUT, new DiscoWatcher(),
+ zk.getSessionId(), zk.getSessionPasswd());
toClose.add(zknew);
zknew.create("/t-"+i, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.EPHEMERAL);
}