You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by ae...@apache.org on 2017/09/18 22:12:17 UTC
[34/50] [abbrv] hadoop git commit: HDFS-12323. NameNode terminates
after full GC thinking QJM unresponsive if full GC is much longer than
timeout. Contributed by Erik Krogen.
HDFS-12323. NameNode terminates after full GC thinking QJM unresponsive if full GC is much longer than timeout. Contributed by Erik Krogen.
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/90894c72
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/90894c72
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/90894c72
Branch: refs/heads/HDFS-7240
Commit: 90894c7262df0243e795b675f3ac9f7b322ccd11
Parents: b9b607d
Author: Erik Krogen <ek...@linkedin.com>
Authored: Thu Sep 14 15:53:33 2017 -0700
Committer: Konstantin V Shvachko <sh...@apache.org>
Committed: Fri Sep 15 13:56:27 2017 -0700
----------------------------------------------------------------------
.../hadoop/hdfs/qjournal/client/QuorumCall.java | 65 ++++++++++++++++----
.../hdfs/qjournal/client/TestQuorumCall.java | 31 +++++++++-
2 files changed, 82 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/90894c72/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
index dc32318..dee74e6 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumCall.java
@@ -24,7 +24,7 @@ import java.util.concurrent.TimeUnit;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.util.StopWatch;
-import org.apache.hadoop.util.Time;
+import org.apache.hadoop.util.Timer;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
@@ -35,6 +35,7 @@ import com.google.common.util.concurrent.ListenableFuture;
import com.google.protobuf.Message;
import com.google.protobuf.TextFormat;
+
/**
* Represents a set of calls for which a quorum of results is needed.
* @param <KEY> a key used to identify each of the outgoing calls
@@ -60,11 +61,12 @@ class QuorumCall<KEY, RESULT> {
* fraction of the configured timeout for any call.
*/
private static final float WAIT_PROGRESS_WARN_THRESHOLD = 0.7f;
- private final StopWatch quorumStopWatch = new StopWatch();
+ private final StopWatch quorumStopWatch;
+ private final Timer timer;
static <KEY, RESULT> QuorumCall<KEY, RESULT> create(
- Map<KEY, ? extends ListenableFuture<RESULT>> calls) {
- final QuorumCall<KEY, RESULT> qr = new QuorumCall<KEY, RESULT>();
+ Map<KEY, ? extends ListenableFuture<RESULT>> calls, Timer timer) {
+ final QuorumCall<KEY, RESULT> qr = new QuorumCall<KEY, RESULT>(timer);
for (final Entry<KEY, ? extends ListenableFuture<RESULT>> e : calls.entrySet()) {
Preconditions.checkArgument(e.getValue() != null,
"null future for key: " + e.getKey());
@@ -82,18 +84,53 @@ class QuorumCall<KEY, RESULT> {
}
return qr;
}
-
+
+ static <KEY, RESULT> QuorumCall<KEY, RESULT> create(
+ Map<KEY, ? extends ListenableFuture<RESULT>> calls) {
+ return create(calls, new Timer());
+ }
+
+ /**
+ * Not intended for outside use.
+ */
private QuorumCall() {
+ this(new Timer());
+ }
+
+ private QuorumCall(Timer timer) {
// Only instantiated from factory method above
+ this.timer = timer;
+ this.quorumStopWatch = new StopWatch(timer);
}
+ /**
+ * Used in conjunction with {@link #getQuorumTimeoutIncreaseMillis(long, int)}
+ * to check for pauses.
+ */
private void restartQuorumStopWatch() {
quorumStopWatch.reset().start();
}
- private boolean shouldIncreaseQuorumTimeout(long offset, int millis) {
+ /**
+ * Check for a pause (e.g. GC) since the last time
+ * {@link #restartQuorumStopWatch()} was called. If detected, return the
+ * length of the pause; else, -1.
+ * @param offset Offset the elapsed time by this amount; use if some amount
+ * of pause was expected
+ * @param millis Total length of timeout in milliseconds
+ * @return Length of pause, if detected, else -1
+ */
+ private long getQuorumTimeoutIncreaseMillis(long offset, int millis) {
long elapsed = quorumStopWatch.now(TimeUnit.MILLISECONDS);
- return elapsed + offset > (millis * WAIT_PROGRESS_INFO_THRESHOLD);
+ long pauseTime = elapsed + offset;
+ if (pauseTime > (millis * WAIT_PROGRESS_INFO_THRESHOLD)) {
+ QuorumJournalManager.LOG.info("Pause detected while waiting for " +
+ "QuorumCall response; increasing timeout threshold by pause time " +
+ "of " + pauseTime + " ms.");
+ return pauseTime;
+ } else {
+ return -1;
+ }
}
@@ -119,7 +156,7 @@ class QuorumCall<KEY, RESULT> {
int minResponses, int minSuccesses, int maxExceptions,
int millis, String operationName)
throws InterruptedException, TimeoutException {
- long st = Time.monotonicNow();
+ long st = timer.monotonicNow();
long nextLogTime = st + (long)(millis * WAIT_PROGRESS_INFO_THRESHOLD);
long et = st + millis;
while (true) {
@@ -128,7 +165,7 @@ class QuorumCall<KEY, RESULT> {
if (minResponses > 0 && countResponses() >= minResponses) return;
if (minSuccesses > 0 && countSuccesses() >= minSuccesses) return;
if (maxExceptions >= 0 && countExceptions() > maxExceptions) return;
- long now = Time.monotonicNow();
+ long now = timer.monotonicNow();
if (now > nextLogTime) {
long waited = now - st;
@@ -154,8 +191,9 @@ class QuorumCall<KEY, RESULT> {
long rem = et - now;
if (rem <= 0) {
// Increase timeout if a full GC occurred after restarting stopWatch
- if (shouldIncreaseQuorumTimeout(0, millis)) {
- et = et + millis;
+ long timeoutIncrease = getQuorumTimeoutIncreaseMillis(0, millis);
+ if (timeoutIncrease > 0) {
+ et += timeoutIncrease;
} else {
throw new TimeoutException();
}
@@ -165,8 +203,9 @@ class QuorumCall<KEY, RESULT> {
rem = Math.max(rem, 1);
wait(rem);
// Increase timeout if a full GC occurred after restarting stopWatch
- if (shouldIncreaseQuorumTimeout(-rem, millis)) {
- et = et + millis;
+ long timeoutIncrease = getQuorumTimeoutIncreaseMillis(-rem, millis);
+ if (timeoutIncrease > 0) {
+ et += timeoutIncrease;
}
}
}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/90894c72/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java
index 506497e..97cf2f3 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumCall.java
@@ -23,7 +23,7 @@ import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.TimeoutException;
-import org.apache.hadoop.hdfs.qjournal.client.QuorumCall;
+import org.apache.hadoop.util.FakeTimer;
import org.junit.Test;
import com.google.common.base.Joiner;
@@ -83,4 +83,33 @@ public class TestQuorumCall {
}
}
+ @Test(timeout=10000)
+ public void testQuorumSucceedsWithLongPause() throws Exception {
+ final Map<String, SettableFuture<String>> futures = ImmutableMap.of(
+ "f1", SettableFuture.<String>create());
+
+ FakeTimer timer = new FakeTimer() {
+ private int callCount = 0;
+ @Override
+ public long monotonicNowNanos() {
+ callCount++;
+ if (callCount == 1) {
+ long old = super.monotonicNowNanos();
+ advance(1000000);
+ return old;
+ } else if (callCount == 10) {
+ futures.get("f1").set("first future");
+ return super.monotonicNowNanos();
+ } else {
+ return super.monotonicNowNanos();
+ }
+ }
+ };
+
+ QuorumCall<String, String> q = QuorumCall.create(futures, timer);
+ assertEquals(0, q.countResponses());
+
+ q.waitFor(1, 0, 0, 3000, "test"); // wait for 1 response
+ }
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org