You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tez.apache.org by je...@apache.org on 2014/11/17 20:22:52 UTC

[43/50] [abbrv] tez git commit: TEZ-1770. Handle ConnectExceptions correctly when establishing connections to an NM which may be down. (sseth)

TEZ-1770. Handle ConnectExceptions correctly when establishing
connections to an NM which may be down. (sseth)


Project: http://git-wip-us.apache.org/repos/asf/tez/repo
Commit: http://git-wip-us.apache.org/repos/asf/tez/commit/0cceb1f2
Tree: http://git-wip-us.apache.org/repos/asf/tez/tree/0cceb1f2
Diff: http://git-wip-us.apache.org/repos/asf/tez/diff/0cceb1f2

Branch: refs/heads/TEZ-8
Commit: 0cceb1f220632e7722930315c03ca8c44c381e68
Parents: 0ebfc1b
Author: Siddharth Seth <ss...@apache.org>
Authored: Wed Nov 12 11:08:12 2014 -0800
Committer: Siddharth Seth <ss...@apache.org>
Committed: Wed Nov 12 11:08:12 2014 -0800

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../library/common/shuffle/HttpConnection.java  | 23 ++++++++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tez/blob/0cceb1f2/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 7ad6903..80263bf 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -23,6 +23,7 @@ ALL CHANGES:
   TEZ-1761. TestRecoveryParser::testGetLastInProgressDAG fails in similar manner to TEZ-1686.
   TEZ-1687. Use logIdentifier of Vertex for logging.
   TEZ-1737. Should add taskNum in VertexFinishedEvent.
+  TEZ-1770. Handle ConnectExceptions correctly when establishing connections to an NM which may be down.
 
 Release 0.5.2: 2014-11-07
 

http://git-wip-us.apache.org/repos/asf/tez/blob/0cceb1f2/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java
----------------------------------------------------------------------
diff --git a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java
index 6e33993..4732a5a 100644
--- a/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java
+++ b/tez-runtime-library/src/main/java/org/apache/tez/runtime/library/common/shuffle/HttpConnection.java
@@ -156,12 +156,14 @@ public class HttpConnection {
     connection.setConnectTimeout(unit);
     int connectionFailures = 0;
     while (true) {
+      long connectStartTime = System.currentTimeMillis();
       try {
         connection.connect();
         connectionSucceeed = true;
         break;
       } catch (IOException ioe) {
         // Don't attempt another connect if already cleanedup.
+        connectionFailures++;
         if (cleanup) {
           LOG.info("Cleanup is set to true. Not attempting to"
               + " connect again. Last exception was: ["
@@ -173,15 +175,32 @@ public class HttpConnection {
         // throw an exception if we have waited for timeout amount of time
         // note that the updated value if timeout is used here
         if (connectionTimeout <= 0) {
-          throw ioe;
+          throw new IOException(
+              "Failed to connect to " + url + ", #connectionFailures=" + connectionFailures, ioe);
+        }
+        long elapsed = System.currentTimeMillis() - connectStartTime;
+        if (elapsed < unit) {
+          try {
+            long sleepTime = unit - elapsed;
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Sleeping for " + sleepTime + " while establishing connection to " + url +
+                  ", since connectAttempt returned in " + elapsed + " ms");
+            }
+            Thread.sleep(sleepTime);
+          } catch (InterruptedException e) {
+            throw new IOException(
+                "Connection establishment sleep interrupted, #connectionFailures=" +
+                    connectionFailures, e);
+          }
         }
+
         // reset the connect timeout for the last try
         if (connectionTimeout < unit) {
           unit = connectionTimeout;
           // reset the connect time out for the final connect
           connection.setConnectTimeout(unit);
         }
-        connectionFailures++;
+
       }
     }
     if (LOG.isDebugEnabled()) {