You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by mb...@apache.org on 2012/04/07 00:12:15 UTC

svn commit: r1310610 - /hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java

Author: mbautin
Date: Fri Apr  6 22:12:15 2012
New Revision: 1310610

URL: http://svn.apache.org/viewvc?rev=1310610&view=rev
Log:
[HBASE-5606][jira] SplitLogManger async delete node hangs log splitting when ZK connection is lost

Summary: the timeout monitor thread shouldn't delete task nodes

Test Plan: SplitLogManager unit tests pass

Reviewers: kannan, kranganathan, mbautin, liyintang

Reviewed By: kannan

CC: hbase-eng@

Differential Revision: https://phabricator.fb.com/D441755

Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java?rev=1310610&r1=1310609&r2=1310610&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java Fri Apr  6 22:12:15 2012
@@ -399,6 +399,13 @@ public class SplitLogManager implements 
     tot_mgr_get_data_queued.incrementAndGet();
   }
 
+  private void tryGetDataSetWatch(String path) {
+    // A negative retry count will lead to ignoring all error processing.
+    this.watcher.getZooKeeper().getData(path, this.watcher,
+        new GetDataAsyncCallback(), new Long(-1) /* retry count */);
+    tot_mgr_get_data_queued.incrementAndGet();
+  }
+
   private void getDataSetWatchSuccess(String path, byte[] data, int version) {
     if (data == null) {
       if (version == Integer.MIN_VALUE) {
@@ -916,11 +923,13 @@ public class SplitLogManager implements 
         for (Map.Entry<String, Task> e : tasks.entrySet()) {
           String path = e.getKey();
           Task task = e.getValue();
-          // we have to do this check again because tasks might have
-          // been asynchronously assigned.
-          if (task.isUnassigned()) {
+          // we have to do task.isUnassigned() check again because tasks might
+          // have been asynchronously assigned. There is no locking required
+          // for these checks ... it is OK even if tryGetDataSetWatch() is
+          // called unnecessarily for a task
+          if (task.isUnassigned() && (task.status != FAILURE)) {
             // We just touch the znode to make sure its still there
-            getDataSetWatch(path, zkretries);
+            tryGetDataSetWatch(path);
           }
         }
         createRescanNode(Long.MAX_VALUE);
@@ -991,6 +1000,12 @@ public class SplitLogManager implements 
           return;
         }
         Long retry_count = (Long) ctx;
+
+        if (retry_count < 0) {
+          LOG.warn("getdata rc = " + KeeperException.Code.get(rc) + " " +
+              path + ". Ignoring error. No error handling. No retrying.");
+          return;
+        }
         LOG.warn("getdata rc = " + KeeperException.Code.get(rc) + " " +
             path + " retry=" + retry_count);
         if (retry_count == 0) {