You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by je...@apache.org on 2014/11/17 20:22:52 UTC
[43/50] [abbrv] tez git commit: TEZ-1770. Handle ConnectExceptions
correctly when establishing connections to an NM which may be down. (sseth)
TEZ-1770. Handle ConnectExceptions correctly when establishing
connections to an NM which may be down. (sseth)
Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/0cceb1f2
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/0cceb1f2
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/0cceb1f2
Branch: refs/heads/TEZ-8
Commit: 0cceb1f220632e7722930315c03ca8c44c381e68
Parents: 0ebfc1b
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed Nov 12 11:08:12 2014 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Wed Nov 12 11:08:12 2014 -0800
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../library/common/shuffle/HttpConnection.java | 23 ++++++++++++++++++--
2 files changed, 22 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tez/blob/0cceb1f2/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 7ad6903..80263bf 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -23,6 +23,7 @@ ALL CHANGES:
TEZ-1761. TestRecoveryParser::testGetLastInProgressDAG fails in similar manner to TEZ-1686.
TEZ-1687. Use logIdentifier of Vertex for logging.
TEZ-1737. Should add taskNum in VertexFinishedEvent.
+ TEZ-1770. Handle ConnectExceptions correctly when establishing connections to an NM which may be down.
Release 0.5.2: 2014-11-07
http://git-wip-us.apache.org/repos/asf/tez/blob/0cceb1f2/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java
index 6e33993..4732a5a 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java
@@ -156,12 +156,14 @@ public class HttpConnection {
connection.setConnectTimeout(unit);
int connectionFailures = 0;
while (true) {
+ long connectStartTime = System.currentTimeMillis();
try {
connection.connect();
connectionSucceeed = true;
break;
} catch (IOException ioe) {
// Don't attempt another connect if already cleanedup.
+ connectionFailures++;
if (cleanup) {
LOG.info("Cleanup is set to true. Not attempting to"
+ " connect again. Last exception was: ["
@@ -173,15 +175,32 @@ public class HttpConnection {
// throw an exception if we have waited for timeout amount of time
// note that the updated value if timeout is used here
if (connectionTimeout <= 0) {
- throw ioe;
+ throw new IOException(
+ "Failed to connect to " + url + ", #connectionFailures=" + connectionFailures, ioe);
+ }
+ long elapsed = System.currentTimeMillis() - connectStartTime;
+ if (elapsed < unit) {
+ try {
+ long sleepTime = unit - elapsed;
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Sleeping for " + sleepTime + " while establishing connection to " + url +
+ ", since connectAttempt returned in " + elapsed + " ms");
+ }
+ Thread.sleep(sleepTime);
+ } catch (InterruptedException e) {
+ throw new IOException(
+ "Connection establishment sleep interrupted, #connectionFailures=" +
+ connectionFailures, e);
+ }
}
+
// reset the connect timeout for the last try
if (connectionTimeout < unit) {
unit = connectionTimeout;
// reset the connect time out for the final connect
connection.setConnectTimeout(unit);
}
- connectionFailures++;
+
}
}
if (LOG.isDebugEnabled()) {