You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by xg...@apache.org on 2015/06/02 19:29:03 UTC

hadoop git commit: YARN-3753. RM failed to come up with "java.io.IOException: Wait for ZKClient creation timed out". Contributed by Jian He

Repository: hadoop
Updated Branches:
  refs/heads/branch-2.7 1ff3f16ed -> b34825b0c


YARN-3753. RM failed to come up with "java.io.IOException: Wait for ZKClient creation timed out". Contributed by Jian He


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/b34825b0
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/b34825b0
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/b34825b0

Branch: refs/heads/branch-2.7
Commit: b34825b0cb1b48a1ab368ba6498d63649feb7458
Parents: 1ff3f16
Author: Xuan <xg...@apache.org>
Authored: Tue Jun 2 10:28:14 2015 -0700
Committer: Xuan <xg...@apache.org>
Committed: Tue Jun 2 10:28:14 2015 -0700

----------------------------------------------------------------------
 hadoop-yarn-project/CHANGES.txt                 |  3 +++
 .../recovery/ZKRMStateStore.java                |  7 ++++--
 .../TestZKRMStateStoreZKClientConnections.java  | 26 +++++++++++++-------
 3 files changed, 25 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/b34825b0/hadoop-yarn-project/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 67e0628..12f200d 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -142,6 +142,9 @@ Release 2.7.1 - UNRELEASED
     YARN-3725. App submission via REST API is broken in secure mode due to 
     Timeline DT service address is empty. (Zhijie Shen via wangda)
 
+    YARN-3753. RM failed to come up with "java.io.IOException: Wait for
+    ZKClient creation timed out”. (Jian He via xgong)
+
 Release 2.7.0 - 2015-04-20
 
   INCOMPATIBLE CHANGES

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b34825b0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
index a116e06..82ac2c1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java
@@ -104,6 +104,8 @@ public class ZKRMStateStore extends RMStateStore {
 
   private String zkHostPort = null;
   private int zkSessionTimeout;
+  // wait time for zkClient to re-establish connection with zk-server.
+  private long zkResyncWaitTime;
 
   @VisibleForTesting
   long zkRetryInterval;
@@ -234,6 +236,7 @@ public class ZKRMStateStore extends RMStateStore {
           conf.getLong(YarnConfiguration.RM_ZK_RETRY_INTERVAL_MS,
               YarnConfiguration.DEFAULT_RM_ZK_RETRY_INTERVAL_MS);
     }
+    zkResyncWaitTime = zkRetryInterval * numRetries;
 
     zkAcl = RMZKUtils.getZKAcls(conf);
     zkAuths = RMZKUtils.getZKAuths(conf);
@@ -1081,11 +1084,11 @@ public class ZKRMStateStore extends RMStateStore {
       long startTime = System.currentTimeMillis();
       synchronized (ZKRMStateStore.this) {
         while (zkClient == null) {
-          ZKRMStateStore.this.wait(zkSessionTimeout);
+          ZKRMStateStore.this.wait(zkResyncWaitTime);
           if (zkClient != null) {
             break;
           }
-          if (System.currentTimeMillis() - startTime > zkSessionTimeout) {
+          if (System.currentTimeMillis() - startTime > zkResyncWaitTime) {
             throw new IOException("Wait for ZKClient creation timed out");
           }
         }

http://git-wip-us.apache.org/repos/asf/hadoop/blob/b34825b0/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java
index 62dc5ef..7f07364 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java
@@ -170,10 +170,10 @@ public class TestZKRMStateStoreZKClientConnections extends
       throws Exception {
 
     TestZKClient zkClientTester = new TestZKClient();
-    String path = "/test";
+    final String path = "/test";
     YarnConfiguration conf = new YarnConfiguration();
     conf.setInt(YarnConfiguration.RM_ZK_TIMEOUT_MS, ZK_TIMEOUT_MS);
-    ZKRMStateStore store =
+    final ZKRMStateStore store =
         (ZKRMStateStore) zkClientTester.getRMStateStore(conf);
     TestDispatcher dispatcher = new TestDispatcher();
     store.setRMDispatcher(dispatcher);
@@ -185,14 +185,20 @@ public class TestZKRMStateStoreZKClientConnections extends
     store.setDataWithRetries(path, "newBytes".getBytes(), 0);
 
     stopServer();
+    final AtomicBoolean isSucceeded = new AtomicBoolean(false);
     zkClientTester.watcher.waitForDisconnected(ZK_OP_WAIT_TIME);
-    try {
-      store.getDataWithRetries(path, true);
-      fail("Expected ZKClient time out exception");
-    } catch (Exception e) {
-      assertTrue(e.getMessage().contains(
-          "Wait for ZKClient creation timed out"));
-    }
+    Thread thread = new Thread() {
+      @Override
+      public void run() {
+        try {
+          store.getDataWithRetries(path, true);
+          isSucceeded.set(true);
+        } catch (Exception e) {
+          isSucceeded.set(false);
+        }
+      }
+    };
+    thread.start();
 
     // ZKRMStateStore Session restored
     startServer();
@@ -206,6 +212,8 @@ public class TestZKRMStateStoreZKClientConnections extends
       fail(error);
     }
     assertEquals("newBytes", new String(ret));
+    thread.join();
+    assertTrue(isSucceeded.get());
   }
 
   @Test(timeout = 20000)