You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2015/05/01 00:33:11 UTC

[3/3] hbase git commit: HBASE-13200 Improper configuration can leads to endless lease recovery during failover (He Liangliang)

HBASE-13200 Improper configuration can leads to endless lease recovery during failover (He Liangliang)

Conflicts:
	hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/8cb97631
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/8cb97631
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/8cb97631

Branch: refs/heads/0.98
Commit: 8cb976312869f3f0a794bf53be71ef5e07d79adb
Parents: 0f3de8a
Author: Liu Shaohui <li...@xiaomi.com>
Authored: Thu Mar 19 10:07:25 2015 +0800
Committer: Andrew Purtell <ap...@apache.org>
Committed: Thu Apr 30 15:30:39 2015 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/util/FSHDFSUtils.java     | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/8cb97631/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
index 6acf8e5..ac9c8e3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
@@ -180,9 +180,11 @@ public class FSHDFSUtils extends FSUtils {
     long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 4000);
     // This should be set to how long it'll take for us to timeout against primary datanode if it
     // is dead.  We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the
-    // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY.
-    long subsequentPause = conf.getInt("hbase.lease.recovery.dfs.timeout", 61 * 1000);
-    
+    // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY. If recovery is still failing after this
+    // timeout, then further recovery will take liner backoff with this base, to avoid endless
+    // preemptions when this value is not properly configured.
+    long subsequentPauseBase = conf.getLong("hbase.lease.recovery.dfs.timeout", 61 * 1000);
+
     Method isFileClosedMeth = null;
     // whether we need to look for isFileClosed method
     boolean findIsFileClosedMeth = true;
@@ -198,11 +200,11 @@ public class FSHDFSUtils extends FSUtils {
         if (nbAttempt == 0) {
           Thread.sleep(firstPause);
         } else {
-          // Cycle here until subsequentPause elapses.  While spinning, check isFileClosed if
-          // available (should be in hadoop 2.0.5... not in hadoop 1 though.
+          // Cycle here until (subsequentPause * nbAttempt) elapses.  While spinning, check
+          // isFileClosed if available (should be in hadoop 2.0.5... not in hadoop 1 though.
           long localStartWaiting = EnvironmentEdgeManager.currentTimeMillis();
           while ((EnvironmentEdgeManager.currentTimeMillis() - localStartWaiting) <
-              subsequentPause) {
+              subsequentPauseBase * nbAttempt) {
             Thread.sleep(conf.getInt("hbase.lease.recovery.pause", 1000));
             if (findIsFileClosedMeth) {
               try {