You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by zj...@apache.org on 2019/04/28 07:37:29 UTC

[zeppelin] branch branch-0.8 updated: [ZEPPELIN-4133]. Idle sessions are no longer being closed even though TimeoutLifecycleManagement is configured properly

This is an automated email from the ASF dual-hosted git repository.

zjffdu pushed a commit to branch branch-0.8
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/branch-0.8 by this push:
     new 4bfb6b8  [ZEPPELIN-4133]. Idle sessions are no longer being closed even though TimeoutLifecycleManagement is configured properly
4bfb6b8 is described below

commit 4bfb6b83518463d2576774e6323ede055c53168f
Author: Jeff Zhang <zj...@apache.org>
AuthorDate: Thu Apr 25 15:44:07 2019 +0800

    [ZEPPELIN-4133]. Idle sessions are no longer being closed even though TimeoutLifecycleManagement is configured properly
    
    ### What is this PR for?
    The root cause is that we use Timer to scheduler periodical interpreter checking task. But java Timer has one critical issue that once the Timer thread is crashed, subsequent task will be suppressed. This PR use ScheduledExecutorService instead to fix this issue.
    
    ### What type of PR is it?
    [Bug Fix]
    
    ### Todos
    * [ ] - Task
    
    ### What is the Jira issue?
    * https://issues.apache.org/jira/browse/ZEPPELIN-4133
    
    ### How should this be tested?
    * CI pass
    
    ### Screenshots (if appropriate)
    
    ### Questions:
    * Does the licenses files need update? No
    * Is there breaking changes for older versions? No
    * Does this needs documentation? No
    
    Author: Jeff Zhang <zj...@apache.org>
    
    Closes #3355 from zjffdu/ZEPPELIN-4133 and squashes the following commits:
    
    0223640fa [Jeff Zhang] put try catch arround close
    c4057694c [Jeff Zhang] [ZEPPELIN-4133]. Idle sessions are no longer being closed even though TimeoutLifecycleManagement is configured properly
    
    (cherry picked from commit 723e0615d0ea7c99118bad8b88ef9b4ef8d3d17f)
    Signed-off-by: Jeff Zhang <zj...@apache.org>
---
 .../lifecycle/TimeoutLifecycleManager.java         | 33 ++++++++++++----------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/lifecycle/TimeoutLifecycleManager.java b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/lifecycle/TimeoutLifecycleManager.java
index 90f3f55..390159b 100644
--- a/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/lifecycle/TimeoutLifecycleManager.java
+++ b/zeppelin-zengine/src/main/java/org/apache/zeppelin/interpreter/lifecycle/TimeoutLifecycleManager.java
@@ -7,9 +7,11 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.Map;
-import java.util.Timer;
-import java.util.TimerTask;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+
+import static java.util.concurrent.TimeUnit.MILLISECONDS;
 
 
 /**
@@ -30,29 +32,30 @@ public class TimeoutLifecycleManager implements LifecycleManager {
   private long checkInterval;
   private long timeoutThreshold;
 
-  private Timer checkTimer;
+  private ScheduledExecutorService checkScheduler;
 
   public TimeoutLifecycleManager(ZeppelinConfiguration zConf) {
     this.checkInterval = zConf.getLong(ZeppelinConfiguration.ConfVars
             .ZEPPELIN_INTERPRETER_LIFECYCLE_MANAGER_TIMEOUT_CHECK_INTERVAL);
     this.timeoutThreshold = zConf.getLong(
         ZeppelinConfiguration.ConfVars.ZEPPELIN_INTERPRETER_LIFECYCLE_MANAGER_TIMEOUT_THRESHOLD);
-    this.checkTimer = new Timer(true);
-    this.checkTimer.scheduleAtFixedRate(new TimerTask() {
-      @Override
-      public void run() {
-        long now = System.currentTimeMillis();
-        for (Map.Entry<ManagedInterpreterGroup, Long> entry : interpreterGroups.entrySet()) {
-          ManagedInterpreterGroup interpreterGroup = entry.getKey();
-          Long lastTimeUsing = entry.getValue();
-          if ((now - lastTimeUsing) > timeoutThreshold )  {
-            LOGGER.info("InterpreterGroup {} is timeout.", interpreterGroup.getId());
+    this.checkScheduler = Executors.newScheduledThreadPool(1);
+    this.checkScheduler.scheduleAtFixedRate(() -> {
+      long now = System.currentTimeMillis();
+      for (Map.Entry<ManagedInterpreterGroup, Long> entry : interpreterGroups.entrySet()) {
+        ManagedInterpreterGroup interpreterGroup = entry.getKey();
+        Long lastTimeUsing = entry.getValue();
+        if ((now - lastTimeUsing) > timeoutThreshold) {
+          LOGGER.info("InterpreterGroup {} is timeout.", interpreterGroup.getId());
+          try {
             interpreterGroup.close();
-            interpreterGroups.remove(entry.getKey());
+          } catch (Exception e) {
+            LOGGER.warn("Fail to close interpreterGroup: " + interpreterGroup.getId(), e);
           }
+          interpreterGroups.remove(entry.getKey());
         }
       }
-    }, checkInterval, checkInterval);
+    }, checkInterval, checkInterval, MILLISECONDS);
     LOGGER.info("TimeoutLifecycleManager is started with checkinterval: " + checkInterval
         + ", timeoutThreshold: " + timeoutThreshold);
   }