You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zeppelin.apache.org by li...@apache.org on 2019/09/19 05:55:16 UTC

[zeppelin] branch master updated: [ZEPPELIN-4263] Fixed cluster docker mode cannot exit the container of the remote interpreter

This is an automated email from the ASF dual-hosted git repository.

liuxun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/zeppelin.git


The following commit(s) were added to refs/heads/master by this push:
     new a530496  [ZEPPELIN-4263] Fixed cluster docker mode cannot exit the container of the remote interpreter
a530496 is described below

commit a5304964a8dbef3a3d0e0eb04cee94e045443bd2
Author: Xun Liu <li...@apache.org>
AuthorDate: Tue Sep 17 15:57:55 2019 +0800

    [ZEPPELIN-4263] Fixed cluster docker mode cannot exit the container of the remote interpreter
    
    ### What is this PR for?
    Because docker first used the `while :; do sleep 1; done` script to start the container.
    
    Execute `bin/interpreter.sh` to execute the interpreter startup script in the container.
    
    When zeppelin closes or restarts the interpreter in docker, the interpreter process in the container can exit normally, but the container's startup shell does not exit, causing the container to continue running.
    
    So we need to determine if the interpreter process exits in the container's startup script.
    
    ### What type of PR is it?
    Bug Fix
    
    ### What is the Jira issue?
    https://issues.apache.org/jira/browse/ZEPPELIN-4263
    
    ### How should this be tested?
    [CI Pass](https://travis-ci.org/liuxunorg/zeppelin/builds/585091106)
    
    ### Screenshots (if appropriate)
    
    ### Questions:
    * Does the licenses files need update? No
    * Is there breaking changes for older versions? No
    * Does this needs documentation? No
    
    Author: Xun Liu <li...@apache.org>
    
    Closes #3445 from liuxunorg/ZEPPELIN-4263 and squashes the following commits:
    
    017e98b01 [Xun Liu] 1. add comment. 2. add determine if container exists.
    93e41e5fb [Xun Liu] [ZEPPELIN-4263] Fixed cluster docker mode cannot exit the container of the remote interpreter
---
 .../launcher/DockerInterpreterProcess.java         | 57 ++++++++++++++++++++--
 1 file changed, 52 insertions(+), 5 deletions(-)

diff --git a/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java b/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java
index 17bb093..9802f81 100644
--- a/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java
+++ b/zeppelin-plugins/launcher/docker/src/main/java/org/apache/zeppelin/interpreter/launcher/DockerInterpreterProcess.java
@@ -45,6 +45,7 @@ import com.spotify.docker.client.DockerClient;
 import com.spotify.docker.client.LogStream;
 import com.spotify.docker.client.ProgressHandler;
 import com.spotify.docker.client.exceptions.DockerException;
+import com.spotify.docker.client.messages.Container;
 import com.spotify.docker.client.messages.ContainerConfig;
 import com.spotify.docker.client.messages.ContainerCreation;
 import com.spotify.docker.client.messages.ExecCreation;
@@ -162,6 +163,8 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess {
   public void start(String userName) throws IOException {
     docker = DefaultDockerClient.builder().uri(URI.create(DOCKER_HOST)).build();
 
+    removeExistContainer(containerName);
+
     final Map<String, List<PortBinding>> portBindings = new HashMap<>();
 
     // Bind container ports to host ports
@@ -191,13 +194,24 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess {
     List<String> listEnv = getListEnvs();
     LOGGER.info("docker listEnv = {}", listEnv);
 
+    // check if the interpreter process exit script
+    // if interpreter process exit, then container need exit
+    StringBuilder sbStartCmd = new StringBuilder();
+    sbStartCmd.append("sleep 10; ");
+    sbStartCmd.append("process=RemoteInterpreterServer; ");
+    sbStartCmd.append("RUNNING_PIDS=$(ps x | grep $process | grep -v grep | awk '{print $1}'); ");
+    sbStartCmd.append("while [ ! -z \"$RUNNING_PIDS\" ]; ");
+    sbStartCmd.append("do sleep 1; ");
+    sbStartCmd.append("RUNNING_PIDS=$(ps x | grep $process | grep -v grep | awk '{print $1}'); ");
+    sbStartCmd.append("done");
+
     // Create container with exposed ports
     final ContainerConfig containerConfig = ContainerConfig.builder()
         .hostConfig(hostConfig)
         .image(containerImage)
         .workingDir("/")
         .env(listEnv)
-        .cmd("sh", "-c", "while :; do sleep 1; done")
+        .cmd("sh", "-c", sbStartCmd.toString())
         .build();
 
     try {
@@ -340,16 +354,49 @@ public class DockerInterpreterProcess extends RemoteInterpreterProcess {
 
       // Remove container
       docker.removeContainer(containerName);
-    } catch (DockerException e) {
-      e.printStackTrace();
-    } catch (InterruptedException e) {
-      e.printStackTrace();
+    } catch (DockerException | InterruptedException e) {
+      LOGGER.error(e.getMessage(), e);
     }
 
     // Close the docker client
     docker.close();
   }
 
+  // Because docker can't create a container with the same name, it will cause the creation to fail.
+  // If the zeppelin service is abnormal and the container that was created is not closed properly,
+  // the container will not be created again.
+  private void removeExistContainer(String containerName) {
+    boolean isExist = false;
+    try {
+      final List<Container> containers
+          = docker.listContainers(DockerClient.ListContainersParam.allContainers());
+      for (Container container : containers) {
+        for (String name : container.names()) {
+          // because container name like '/md-shared', so need add '/'
+          if (StringUtils.equals(name, "/" + containerName)) {
+            isExist = true;
+            break;
+          }
+        }
+      }
+
+      if (isExist == true) {
+        LOGGER.info("kill exist container {}", containerName);
+        docker.killContainer(containerName);
+      }
+    } catch (DockerException | InterruptedException e) {
+      LOGGER.error(e.getMessage(), e);
+    } finally {
+      try {
+        if (isExist == true) {
+          docker.removeContainer(containerName);
+        }
+      } catch (DockerException | InterruptedException e) {
+        LOGGER.error(e.getMessage(), e);
+      }
+    }
+  }
+
   @Override
   public String getHost() {
     return containerHost;