You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by pu...@apache.org on 2016/05/26 23:01:46 UTC

oozie git commit: OOZIE-2467 Oozie can shutdown itself on long GC pause

Repository: oozie
Updated Branches:
  refs/heads/master fe2e9d2ef -> c0b5497b2


OOZIE-2467 Oozie can shutdown itself on long GC pause


Project: http://git-wip-us.apache.org/repos/asf/oozie/repo
Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/c0b5497b
Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/c0b5497b
Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/c0b5497b

Branch: refs/heads/master
Commit: c0b5497b209be0f53f0b05336d883fc329b2a1d2
Parents: fe2e9d2
Author: Purshotam Shah <pu...@yahoo-inc.com>
Authored: Thu May 26 16:01:22 2016 -0700
Committer: Purshotam Shah <pu...@yahoo-inc.com>
Committed: Thu May 26 16:01:22 2016 -0700

----------------------------------------------------------------------
 .../event/listener/ZKConnectionListener.java    |  6 ++---
 .../oozie/service/ConfigurationService.java     |  5 ++++
 .../java/org/apache/oozie/util/ZKUtils.java     | 27 ++++++++++++--------
 core/src/main/resources/oozie-default.xml       | 18 +++++++++++--
 release-log.txt                                 |  1 +
 5 files changed, 40 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/oozie/blob/c0b5497b/core/src/main/java/org/apache/oozie/event/listener/ZKConnectionListener.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/oozie/event/listener/ZKConnectionListener.java b/core/src/main/java/org/apache/oozie/event/listener/ZKConnectionListener.java
index c6415b1..c27310e 100644
--- a/core/src/main/java/org/apache/oozie/event/listener/ZKConnectionListener.java
+++ b/core/src/main/java/org/apache/oozie/event/listener/ZKConnectionListener.java
@@ -24,7 +24,6 @@ import org.apache.curator.framework.state.ConnectionStateListener;
 import org.apache.oozie.service.ConfigurationService;
 import org.apache.oozie.service.Services;
 import org.apache.oozie.util.XLog;
-import org.apache.oozie.util.ZKUtils;
 
 /**
  * ZKConnectionListener listens on ZK connection status.
@@ -47,8 +46,7 @@ public class ZKConnectionListener implements ConnectionStateListener {
         // ZK connected
         // }
         if (newState == ConnectionState.SUSPENDED) {
-            LOG.warn("ZK connection is suspended, waiting for reconnect. If connection doesn't reconnect before "
-                    + ZKUtils.getZKConnectionTimeout() + " (sec) Oozie server will shutdown itself");
+            LOG.warn("ZK connection is suspended, waiting to reconnect.");
         }
 
         if (newState == ConnectionState.RECONNECTED) {
@@ -57,7 +55,7 @@ public class ZKConnectionListener implements ConnectionStateListener {
         }
 
         if (newState == ConnectionState.LOST) {
-            LOG.fatal("ZK is not reconnected in " + ZKUtils.getZKConnectionTimeout());
+            LOG.fatal("ZK is not reconnected");
             if (ConfigurationService.getBoolean(CONF_SHUTDOWN_ON_TIMEOUT)) {
                 LOG.fatal("Shutting down Oozie server");
                 Services.get().destroy();

http://git-wip-us.apache.org/repos/asf/oozie/blob/c0b5497b/core/src/main/java/org/apache/oozie/service/ConfigurationService.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/oozie/service/ConfigurationService.java b/core/src/main/java/org/apache/oozie/service/ConfigurationService.java
index 9d4dcd9..7f140d9 100644
--- a/core/src/main/java/org/apache/oozie/service/ConfigurationService.java
+++ b/core/src/main/java/org/apache/oozie/service/ConfigurationService.java
@@ -534,6 +534,11 @@ public class ConfigurationService implements Service, Instrumentable {
         return getInt(conf, name);
     }
 
+    public static int getInt(String name, int defaultValue) {
+        Configuration conf = Services.get().getConf();
+        return conf.getInt(name, defaultValue);
+    }
+
     public static int getInt(Configuration conf, String name) {
         return conf.getInt(name, ConfigUtils.INT_DEFAULT);
     }

http://git-wip-us.apache.org/repos/asf/oozie/blob/c0b5497b/core/src/main/java/org/apache/oozie/util/ZKUtils.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/oozie/util/ZKUtils.java b/core/src/main/java/org/apache/oozie/util/ZKUtils.java
index 91f8f70..8069641 100644
--- a/core/src/main/java/org/apache/oozie/util/ZKUtils.java
+++ b/core/src/main/java/org/apache/oozie/util/ZKUtils.java
@@ -94,11 +94,21 @@ public class ZKUtils {
     public static final String ZK_NAMESPACE = "oozie.zookeeper.namespace";
 
     /**
-     *Default ZK connection timeout ( in sec). If connection is lost for more than timeout, then Oozie server will shutdown itself.
+     *Default ZK connection timeout ( in sec).
      */
     public static final String ZK_CONNECTION_TIMEOUT = "oozie.zookeeper.connection.timeout";
 
     /**
+     *Default ZK session timeout ( in sec). If connection is lost after retry, then Oozie server will shutdown itself.
+     */
+    public static final String ZK_SESSION_TIMEOUT = "oozie.zookeeper.session.timeout";
+
+    /**
+     * Maximum number of times to retry.
+     */
+    public static final String ZK_MAX_RETRIES = "oozie.zookeeper.max.retries";
+
+    /**
      * oozie-env environment variable for specifying the Oozie instance ID
      */
     public static final String OOZIE_INSTANCE_ID = "oozie.instance.id";
@@ -124,7 +134,7 @@ public class ZKUtils {
     private XLog log;
 
     private static ZKUtils zk = null;
-    private static int zkConnectionTimeout;
+
 
     /**
      * Private Constructor for the singleton; it connects to ZooKeeper and advertises this Oozie Server.
@@ -179,7 +189,8 @@ public class ZKUtils {
         RetryPolicy retryPolicy = ZKUtils.getRetryPolicy();
         String zkConnectionString = ConfigurationService.get(ZK_CONNECTION_STRING);
         String zkNamespace = getZKNameSpace();
-        zkConnectionTimeout = ConfigurationService.getInt(ZK_CONNECTION_TIMEOUT);
+        int zkConnectionTimeout = ConfigurationService.getInt(ZK_CONNECTION_TIMEOUT);
+        int zkSessionTimeout = ConfigurationService.getInt(ZK_SESSION_TIMEOUT, 300);
 
         ACLProvider aclProvider;
         if (Services.get().getConf().getBoolean(ZK_SECURE, false)) {
@@ -199,6 +210,7 @@ public class ZKUtils {
                                             .retryPolicy(retryPolicy)
                                             .aclProvider(aclProvider)
                                             .connectionTimeoutMs(zkConnectionTimeout * 1000) // in ms
+                                            .sessionTimeoutMs(zkSessionTimeout * 1000) //in ms
                                             .build();
         client.start();
         client.getConnectionStateListenable().addListener(new ZKConnectionListener());
@@ -409,7 +421,7 @@ public class ZKUtils {
      * @return RetryPolicy
      */
     public static RetryPolicy getRetryPolicy() {
-        return new ExponentialBackoffRetry(1000, 3);
+        return new ExponentialBackoffRetry(1000, ConfigurationService.getInt(ZK_MAX_RETRIES, 10));
     }
 
     /**
@@ -419,11 +431,4 @@ public class ZKUtils {
     public static String getZKNameSpace() {
         return ConfigurationService.get(ZK_NAMESPACE);
     }
-    /**
-     * Return ZK connection timeout
-     * @return
-     */
-    public static int getZKConnectionTimeout(){
-        return zkConnectionTimeout;
-    }
 }

http://git-wip-us.apache.org/repos/asf/oozie/blob/c0b5497b/core/src/main/resources/oozie-default.xml
----------------------------------------------------------------------
diff --git a/core/src/main/resources/oozie-default.xml b/core/src/main/resources/oozie-default.xml
index 942c847..2a2822b 100644
--- a/core/src/main/resources/oozie-default.xml
+++ b/core/src/main/resources/oozie-default.xml
@@ -2381,8 +2381,22 @@ will be the requeue interval for the actions which are waiting for a long time w
         <name>oozie.zookeeper.connection.timeout</name>
         <value>180</value>
         <description>
-        Default ZK connection timeout (in sec). If connection is lost for more than timeout, then Oozie server will shutdown
-        itself if oozie.zookeeper.server.shutdown.ontimeout is true.
+        Default ZK connection timeout (in sec).
+        </description>
+    </property>
+    <property>
+        <name>oozie.zookeeper.session.timeout</name>
+        <value>300</value>
+        <description>
+            Default ZK session timeout (in sec). If connection is lost even after retry, then Oozie server will shutdown
+            itself if oozie.zookeeper.server.shutdown.ontimeout is true.
+        </description>
+    </property>
+    <property>
+        <name>oozie.zookeeper.max.retries</name>
+        <value>10</value>
+        <description>
+            Maximum number of times to retry.
         </description>
     </property>
 

http://git-wip-us.apache.org/repos/asf/oozie/blob/c0b5497b/release-log.txt
----------------------------------------------------------------------
diff --git a/release-log.txt b/release-log.txt
index 0d4282f..7f40a99 100644
--- a/release-log.txt
+++ b/release-log.txt
@@ -1,5 +1,6 @@
 -- Oozie 4.3.0 release (trunk - unreleased)
 
+OOZIE-2467 Oozie can shutdown itself on long GC pause (puru)
 OOZIE-2537 SqoopMain does not set up log4j properly (pbacsko via rkanter)
 OOZIE-2532 patch apply does not handle binary files (gezapeti via rkanter)
 OOZIE-2330 Spark action should take the global jobTracker and nameNode configs by default and allow file and archive elements (satishsaley via rkanter)