You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ns...@apache.org on 2011/10/11 04:17:15 UTC

svn commit: r1181532 - in /hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase: util/Bytes.java zookeeper/RecoverableZooKeeper.java

Author: nspiegelberg
Date: Tue Oct 11 02:17:14 2011
New Revision: 1181532

URL: http://svn.apache.org/viewvc?rev=1181532&view=rev
Log:
Better handling of "node exists" and "no node" exceptions in the recoverable ZooKeeper

Summary:
In Task#536980 we noticed a race condition where a master got restarted, the
backup master came up, and the original master came up as well, resulting in two
active masters. We traced this back to the RecoverableZooKeeper wrapper, which
assumes that if a node creation operation results in a "node exists" exception,
the operation is successful, while in fact the other client grabbed the
ephemeral node. This fix adds a verification of the node contents and re-throws
the NODEEXISTS KeeperException if the data does not match.

Test Plan:
Start a test cluster. Start two masters. Kill the active master and restart
immediately. Verify that both masters compete for the ZK node, but only one
master becomes active.

Reviewed By: liyintang
Reviewers: liyintang, kannan, kranganathan
CC: hbase@lists, , liyintang
Revert Plan:
OK

Differential Revision: 242017

Modified:
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
    hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/zookeeper/RecoverableZooKeeper.java

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/Bytes.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/Bytes.java?rev=1181532&r1=1181531&r2=1181532&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/Bytes.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/util/Bytes.java Tue Oct 11 02:17:14 2011
@@ -302,6 +302,16 @@ public class Bytes {
   }
 
   /**
+   * The same as {@link #toStringBinary(byte[])}, but returns a string "null"
+   * if given a null argument.
+   */
+  public static String toStringBinarySafe(final byte [] b) {
+    if (b == null)
+      return "null";
+    return toStringBinary(b, 0, b.length);
+  }
+
+  /**
    * Write a printable representation of a byte array. Non-printable
    * characters are hex escaped in the format \\x%02X, eg:
    * \x00 \x05 etc

Modified: hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/zookeeper/RecoverableZooKeeper.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/zookeeper/RecoverableZooKeeper.java?rev=1181532&r1=1181531&r2=1181532&view=diff
==============================================================================
--- hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/zookeeper/RecoverableZooKeeper.java (original)
+++ hbase/branches/0.89/src/main/java/org/apache/hadoop/hbase/zookeeper/RecoverableZooKeeper.java Tue Oct 11 02:17:14 2011
@@ -86,6 +86,7 @@ public class RecoverableZooKeeper {
   public void delete(String path, int version)
     throws InterruptedException, KeeperException {
     RetryCounter retryCounter = retryCounterFactory.create();
+    boolean isRetry = false; // False for first attempt, true for all retries.
     while (true) {
       try {
         zk.delete(path, version);
@@ -93,7 +94,14 @@ public class RecoverableZooKeeper {
       } catch (KeeperException e) {
         switch (e.code()) {
           case NONODE:
-            return; // Delete was successful
+            if (isRetry) {
+              LOG.info("Node " + path + " already deleted. Assuming that a " +
+                  "previous attempt succeeded.");
+              return;
+            }
+            LOG.warn("Node " + path + " already deleted, and this is not a " +
+                     "retry");
+            throw e;
 
           case CONNECTIONLOSS:
           case OPERATIONTIMEOUT:
@@ -113,6 +121,7 @@ public class RecoverableZooKeeper {
 		"ZooKeeper after sleeping "+retryIntervalMillis+" ms");
       retryCounter.sleepUntilNextRetry();
       retryCounter.useRetry();
+      isRetry = true;
     }
   }
 
@@ -445,14 +454,31 @@ public class RecoverableZooKeeper {
   private String createNonSequential(String path, byte[] data, List<ACL> acl,
       CreateMode createMode) throws KeeperException, InterruptedException {
     RetryCounter retryCounter = retryCounterFactory.create();
+    boolean isRetry = false; // False for first attempt, true for all retries.
     while (true) {
       try {
         return zk.create(path, data, acl, createMode);
       } catch (KeeperException e) {
         switch (e.code()) {
           case NODEEXISTS:
-            // Non-sequential node was successfully created
-            return path;
+            if (isRetry) {
+              // If the connection was lost, there is still a possibility that
+              // we have successfully created the node at our previous attempt,
+              // so we read the node and compare.
+              byte[] currentData = zk.getData(path, false, null);
+              if (currentData != null &&
+                  Bytes.compareTo(currentData, id) == 0) {
+                // We successfully created a non-sequential node
+                return path;
+              }
+              LOG.error("Node " + path + " already exists with " +
+                  Bytes.toStringBinarySafe(currentData) + ", could not write " +
+                  Bytes.toStringBinarySafe(data));
+              throw e;
+            }
+            LOG.error("Node " + path + " already exists and this is not a " +
+			"retry");
+            throw e;
 
           case CONNECTIONLOSS:
           case OPERATIONTIMEOUT:
@@ -472,6 +498,7 @@ public class RecoverableZooKeeper {
           "ZooKeeper after sleeping "+retryIntervalMillis+" ms");
       retryCounter.sleepUntilNextRetry();
       retryCounter.useRetry();
+      isRetry = true;
     }
   }