You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2015/05/01 00:33:10 UTC
[2/3] hbase git commit: HBASE-13200 Improper configuration can leads
to endless lease recovery during failover (He Liangliang)
HBASE-13200 Improper configuration can leads to endless lease recovery during failover (He Liangliang)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/82c2f282
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/82c2f282
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/82c2f282
Branch: refs/heads/branch-1.1
Commit: 82c2f2825202d98cf2f6c5524d97a56a0fa03a1d
Parents: 786a413
Author: Liu Shaohui <li...@xiaomi.com>
Authored: Thu Mar 19 10:07:25 2015 +0800
Committer: Andrew Purtell <ap...@apache.org>
Committed: Thu Apr 30 15:15:36 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hbase/util/FSHDFSUtils.java | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/82c2f282/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
index 8bdac15..0fffcc6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
@@ -180,9 +180,11 @@ public class FSHDFSUtils extends FSUtils {
long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 4000);
// This should be set to how long it'll take for us to timeout against primary datanode if it
// is dead. We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the
- // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY.
- long subsequentPause = conf.getInt("hbase.lease.recovery.dfs.timeout", 61 * 1000);
-
+ // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY. If recovery is still failing after this
+ // timeout, then further recovery will take liner backoff with this base, to avoid endless
+ // preemptions when this value is not properly configured.
+ long subsequentPauseBase = conf.getLong("hbase.lease.recovery.dfs.timeout", 61 * 1000);
+
Method isFileClosedMeth = null;
// whether we need to look for isFileClosed method
boolean findIsFileClosedMeth = true;
@@ -198,11 +200,11 @@ public class FSHDFSUtils extends FSUtils {
if (nbAttempt == 0) {
Thread.sleep(firstPause);
} else {
- // Cycle here until subsequentPause elapses. While spinning, check isFileClosed if
- // available (should be in hadoop 2.0.5... not in hadoop 1 though.
+ // Cycle here until (subsequentPause * nbAttempt) elapses. While spinning, check
+ // isFileClosed if available (should be in hadoop 2.0.5... not in hadoop 1 though.
long localStartWaiting = EnvironmentEdgeManager.currentTime();
while ((EnvironmentEdgeManager.currentTime() - localStartWaiting) <
- subsequentPause) {
+ subsequentPauseBase * nbAttempt) {
Thread.sleep(conf.getInt("hbase.lease.recovery.pause", 1000));
if (findIsFileClosedMeth) {
try {