You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by vb...@apache.org on 2020/08/05 14:46:46 UTC

[hudi] branch master updated: [MINOR] Adding timeout for each command execution in docker and capture output. This will help get stdout/stderr of stuck commands (#1918)

This is an automated email from the ASF dual-hosted git repository.

vbalaji pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 61e027f  [MINOR] Adding timeout for each command execution in docker and capture output. This will help get stdout/stderr of stuck commands (#1918)
61e027f is described below

commit 61e027fadda4cefc56a18bc284267d34b336f6cc
Author: Balaji Varadarajan <ba...@robinhood.com>
AuthorDate: Wed Aug 5 07:46:34 2020 -0700

    [MINOR] Adding timeout for each command execution in docker and capture output. This will help get stdout/stderr of stuck commands (#1918)
---
 .../test/java/org/apache/hudi/integ/ITTestBase.java   | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
index 7b70857..a5e6ed9 100644
--- a/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
+++ b/hudi-integ-test/src/test/java/org/apache/hudi/integ/ITTestBase.java
@@ -18,6 +18,8 @@
 
 package org.apache.hudi.integ;
 
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import org.apache.hudi.common.util.FileIOUtils;
 import org.apache.hudi.common.util.collection.Pair;
 
@@ -158,8 +160,21 @@ public abstract class ITTestBase {
     ExecCreateCmdResponse createCmdResponse = cmd.exec();
     TestExecStartResultCallback callback =
         new TestExecStartResultCallback(new ByteArrayOutputStream(), new ByteArrayOutputStream());
-    dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false).exec(callback)
-        .awaitCompletion();
+    // Each execution of command(s) in docker should not be more than 15 mins. Otherwise, it is deemed stuck. We will
+    // try to capture stdout and stderr of the stuck process.
+
+    boolean completed =
+      dockerClient.execStartCmd(createCmdResponse.getId()).withDetach(false).withTty(false).exec(callback)
+        .awaitCompletion(900, SECONDS);
+    if (!completed) {
+      callback.getStderr().flush();
+      callback.getStdout().flush();
+      LOG.error("\n\n ###### Timed Out Command : " +  Arrays.asList(command));
+      LOG.error("\n\n ###### Stderr of timed-out command #######\n" + callback.getStderr().toString());
+      LOG.error("\n\n ###### stdout of timed-out command #######\n" + callback.getStderr().toString());
+      throw new TimeoutException("Command " + command +  " has been running for more than 15 minutes. "
+        + "Killing and failing !!");
+    }
     int exitCode = dockerClient.inspectExecCmd(createCmdResponse.getId()).exec().getExitCode();
     LOG.info("Exit code for command : " + exitCode);
     if (exitCode != 0) {