You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2015/05/01 00:33:09 UTC
[1/3] hbase git commit: HBASE-13200 Improper configuration can leads
to endless lease recovery during failover (He Liangliang)
Repository: hbase
Updated Branches:
refs/heads/0.98 0f3de8a17 -> 8cb976312
refs/heads/branch-1 6d40b547a -> d4f6928b0
refs/heads/branch-1.1 786a413ec -> 82c2f2825
HBASE-13200 Improper configuration can leads to endless lease recovery during failover (He Liangliang)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/d4f6928b
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/d4f6928b
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/d4f6928b
Branch: refs/heads/branch-1
Commit: d4f6928b052ce6606005650a86c37536a3966cc7
Parents: 6d40b54
Author: Liu Shaohui <li...@xiaomi.com>
Authored: Thu Mar 19 10:07:25 2015 +0800
Committer: Andrew Purtell <ap...@apache.org>
Committed: Thu Apr 30 15:12:22 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hbase/util/FSHDFSUtils.java | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/d4f6928b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
index 8bdac15..0fffcc6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
@@ -180,9 +180,11 @@ public class FSHDFSUtils extends FSUtils {
long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 4000);
// This should be set to how long it'll take for us to timeout against primary datanode if it
// is dead. We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the
- // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY.
- long subsequentPause = conf.getInt("hbase.lease.recovery.dfs.timeout", 61 * 1000);
-
+ // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY. If recovery is still failing after this
+ // timeout, then further recovery will take liner backoff with this base, to avoid endless
+ // preemptions when this value is not properly configured.
+ long subsequentPauseBase = conf.getLong("hbase.lease.recovery.dfs.timeout", 61 * 1000);
+
Method isFileClosedMeth = null;
// whether we need to look for isFileClosed method
boolean findIsFileClosedMeth = true;
@@ -198,11 +200,11 @@ public class FSHDFSUtils extends FSUtils {
if (nbAttempt == 0) {
Thread.sleep(firstPause);
} else {
- // Cycle here until subsequentPause elapses. While spinning, check isFileClosed if
- // available (should be in hadoop 2.0.5... not in hadoop 1 though.
+ // Cycle here until (subsequentPause * nbAttempt) elapses. While spinning, check
+ // isFileClosed if available (should be in hadoop 2.0.5... not in hadoop 1 though.
long localStartWaiting = EnvironmentEdgeManager.currentTime();
while ((EnvironmentEdgeManager.currentTime() - localStartWaiting) <
- subsequentPause) {
+ subsequentPauseBase * nbAttempt) {
Thread.sleep(conf.getInt("hbase.lease.recovery.pause", 1000));
if (findIsFileClosedMeth) {
try {
[3/3] hbase git commit: HBASE-13200 Improper configuration can leads
to endless lease recovery during failover (He Liangliang)
Posted by ap...@apache.org.
HBASE-13200 Improper configuration can leads to endless lease recovery during failover (He Liangliang)
Conflicts:
hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/8cb97631
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/8cb97631
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/8cb97631
Branch: refs/heads/0.98
Commit: 8cb976312869f3f0a794bf53be71ef5e07d79adb
Parents: 0f3de8a
Author: Liu Shaohui <li...@xiaomi.com>
Authored: Thu Mar 19 10:07:25 2015 +0800
Committer: Andrew Purtell <ap...@apache.org>
Committed: Thu Apr 30 15:30:39 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hbase/util/FSHDFSUtils.java | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/8cb97631/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
index 6acf8e5..ac9c8e3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
@@ -180,9 +180,11 @@ public class FSHDFSUtils extends FSUtils {
long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 4000);
// This should be set to how long it'll take for us to timeout against primary datanode if it
// is dead. We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the
- // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY.
- long subsequentPause = conf.getInt("hbase.lease.recovery.dfs.timeout", 61 * 1000);
-
+ // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY. If recovery is still failing after this
+ // timeout, then further recovery will take liner backoff with this base, to avoid endless
+ // preemptions when this value is not properly configured.
+ long subsequentPauseBase = conf.getLong("hbase.lease.recovery.dfs.timeout", 61 * 1000);
+
Method isFileClosedMeth = null;
// whether we need to look for isFileClosed method
boolean findIsFileClosedMeth = true;
@@ -198,11 +200,11 @@ public class FSHDFSUtils extends FSUtils {
if (nbAttempt == 0) {
Thread.sleep(firstPause);
} else {
- // Cycle here until subsequentPause elapses. While spinning, check isFileClosed if
- // available (should be in hadoop 2.0.5... not in hadoop 1 though.
+ // Cycle here until (subsequentPause * nbAttempt) elapses. While spinning, check
+ // isFileClosed if available (should be in hadoop 2.0.5... not in hadoop 1 though.
long localStartWaiting = EnvironmentEdgeManager.currentTimeMillis();
while ((EnvironmentEdgeManager.currentTimeMillis() - localStartWaiting) <
- subsequentPause) {
+ subsequentPauseBase * nbAttempt) {
Thread.sleep(conf.getInt("hbase.lease.recovery.pause", 1000));
if (findIsFileClosedMeth) {
try {
[2/3] hbase git commit: HBASE-13200 Improper configuration can leads
to endless lease recovery during failover (He Liangliang)
Posted by ap...@apache.org.
HBASE-13200 Improper configuration can leads to endless lease recovery during failover (He Liangliang)
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/82c2f282
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/82c2f282
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/82c2f282
Branch: refs/heads/branch-1.1
Commit: 82c2f2825202d98cf2f6c5524d97a56a0fa03a1d
Parents: 786a413
Author: Liu Shaohui <li...@xiaomi.com>
Authored: Thu Mar 19 10:07:25 2015 +0800
Committer: Andrew Purtell <ap...@apache.org>
Committed: Thu Apr 30 15:15:36 2015 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hbase/util/FSHDFSUtils.java | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hbase/blob/82c2f282/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
index 8bdac15..0fffcc6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java
@@ -180,9 +180,11 @@ public class FSHDFSUtils extends FSUtils {
long firstPause = conf.getInt("hbase.lease.recovery.first.pause", 4000);
// This should be set to how long it'll take for us to timeout against primary datanode if it
// is dead. We set it to 61 seconds, 1 second than the default READ_TIMEOUT in HDFS, the
- // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY.
- long subsequentPause = conf.getInt("hbase.lease.recovery.dfs.timeout", 61 * 1000);
-
+ // default value for DFS_CLIENT_SOCKET_TIMEOUT_KEY. If recovery is still failing after this
+ // timeout, then further recovery will take liner backoff with this base, to avoid endless
+ // preemptions when this value is not properly configured.
+ long subsequentPauseBase = conf.getLong("hbase.lease.recovery.dfs.timeout", 61 * 1000);
+
Method isFileClosedMeth = null;
// whether we need to look for isFileClosed method
boolean findIsFileClosedMeth = true;
@@ -198,11 +200,11 @@ public class FSHDFSUtils extends FSUtils {
if (nbAttempt == 0) {
Thread.sleep(firstPause);
} else {
- // Cycle here until subsequentPause elapses. While spinning, check isFileClosed if
- // available (should be in hadoop 2.0.5... not in hadoop 1 though.
+ // Cycle here until (subsequentPause * nbAttempt) elapses. While spinning, check
+ // isFileClosed if available (should be in hadoop 2.0.5... not in hadoop 1 though.
long localStartWaiting = EnvironmentEdgeManager.currentTime();
while ((EnvironmentEdgeManager.currentTime() - localStartWaiting) <
- subsequentPause) {
+ subsequentPauseBase * nbAttempt) {
Thread.sleep(conf.getInt("hbase.lease.recovery.pause", 1000));
if (findIsFileClosedMeth) {
try {