You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2015/05/01 00:33:10 UTC

[2/3] hbase git commit: HBASE-13200 Improper configuration can leads to endless lease recovery during failover (He Liangliang)

HBASE-13200 Improper configuration can leads to endless lease recovery during failover (He Liangliang)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/82c2f282
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/82c2f282
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/82c2f282

Branch: refs/heads/branch-1.1
Commit: 82c2f2825202d98cf2f6c5524d97a56a0fa03a1d
Parents: 786a413
Author: Liu Shaohui <li...@xiaomi.com>
Authored: Thu Mar 19 10:07:25 2015 +0800
Committer: Andrew Purtell <ap...@apache.org>
Committed: Thu Apr 30 15:15:36 2015 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/util/FSHDFSUtils.java     | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/82c2f282/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
index 8bdac15..0fffcc6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
@@ -180,9 +180,11 @@ public class FSHDFSUtils extends FSUtils {
     long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 4000);
     // This should be set to how long it'll take for us to timeout against primary datanode if it
     // is dead.  We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the
-    // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY.
-    long subsequentPause = conf.getInt("hbase.lease.recovery.dfs.timeout", 61 * 1000);
-    
+    // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY. If recovery is still failing after this
+    // timeout, then further recovery will take liner backoff with this base, to avoid endless
+    // preemptions when this value is not properly configured.
+    long subsequentPauseBase = conf.getLong("hbase.lease.recovery.dfs.timeout", 61 * 1000);
+
     Method isFileClosedMeth = null;
     // whether we need to look for isFileClosed method
     boolean findIsFileClosedMeth = true;
@@ -198,11 +200,11 @@ public class FSHDFSUtils extends FSUtils {
         if (nbAttempt == 0) {
           Thread.sleep(firstPause);
         } else {
-          // Cycle here until subsequentPause elapses.  While spinning, check isFileClosed if
-          // available (should be in hadoop 2.0.5... not in hadoop 1 though.
+          // Cycle here until (subsequentPause * nbAttempt) elapses.  While spinning, check
+          // isFileClosed if available (should be in hadoop 2.0.5... not in hadoop 1 though.
           long localStartWaiting = EnvironmentEdgeManager.currentTime();
           while ((EnvironmentEdgeManager.currentTime() - localStartWaiting) <
-              subsequentPause) {
+              subsequentPauseBase * nbAttempt) {
             Thread.sleep(conf.getInt("hbase.lease.recovery.pause", 1000));
             if (findIsFileClosedMeth) {
               try {