You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@aurora.apache.org by zm...@apache.org on 2016/11/05 01:11:55 UTC

aurora git commit: Fix regression in 5410c22.

Repository: aurora
Updated Branches:
  refs/heads/master 5410c229f -> 84bde070f


Fix regression in 5410c22.

The hard dependency on `prctl` broke thermos unit tests both on Apache Jenkins
and OS X. This adopts serb's suggestion https://reviews.apache.org/r/53418/ and
wraps the `prcl(2)` call in a try except block.

This also exposed some flakyness in
`TestRunnerKillProcessGroup.test_pg_is_killed`. Marked the test as flaky and
filed AURORA-1809.

Testing Done:
./pants test.pytest --junit-xml-dir="$PWD/dist/test-results" src/{main,test}/python:: -- -v
src/test/sh/org/apache/aurora/e2e/test_end_to_end.sh

Reviewed at https://reviews.apache.org/r/53508/


Project: http://git-wip-us.apache.org/repos/asf/aurora/repo
Commit: http://git-wip-us.apache.org/repos/asf/aurora/commit/84bde070
Tree: http://git-wip-us.apache.org/repos/asf/aurora/tree/84bde070
Diff: http://git-wip-us.apache.org/repos/asf/aurora/diff/84bde070

Branch: refs/heads/master
Commit: 84bde070f593f149c9e35b24e5781960b82dec80
Parents: 5410c22
Author: Zameer Manji <zm...@apache.org>
Authored: Fri Nov 4 18:11:46 2016 -0700
Committer: Zameer Manji <zm...@apache.org>
Committed: Fri Nov 4 18:11:46 2016 -0700

----------------------------------------------------------------------
 RELEASE-NOTES.md                                |  1 -
 .../apache/thermos/common/process_util.py       | 27 ++++++++++++--------
 src/main/python/apache/thermos/core/process.py  | 12 +--------
 .../apache/thermos/core/test_staged_kill.py     |  1 +
 4 files changed, 18 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/aurora/blob/84bde070/RELEASE-NOTES.md
----------------------------------------------------------------------
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 94224be..374bee6 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -11,7 +11,6 @@
   Containerizer if the scheduler is running with the `-allow_container_volumes`
   flag.
 * The executor will send SIGTERM to processes that self daemonize via double forking.
-* The executor now requires Linux kernel 3.4 or later.
 
 ### Deprecations and removals:
 

http://git-wip-us.apache.org/repos/asf/aurora/blob/84bde070/src/main/python/apache/thermos/common/process_util.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/thermos/common/process_util.py b/src/main/python/apache/thermos/common/process_util.py
index c63b9af..637b025 100644
--- a/src/main/python/apache/thermos/common/process_util.py
+++ b/src/main/python/apache/thermos/common/process_util.py
@@ -57,18 +57,23 @@ def setup_child_subreaping():
   and here: https://lwn.net/Articles/474787/
 
   Callers should reap terminal children to prevent zombies.
-
-  raises OSError if the underlying prctl call fails.
-  raises RuntimeError if libc cannot be found.
   """
   log.debug("Calling prctl(2) with PR_SET_CHILD_SUBREAPER")
   # This constant is taken from prctl.h
   PR_SET_CHILD_SUBREAPER = 36
-  library_name = ctypes.util.find_library('c')
-  if library_name is None:
-    raise RuntimeError("libc not found")
-  libc = ctypes.CDLL(library_name, use_errno=True)
-  ret = libc.prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0)
-  if ret != 0:
-    errno = ctypes.get_errno()
-    raise OSError(errno, os.strerror(errno))
+  try:
+    library_name = ctypes.util.find_library('c')
+    if library_name is None:
+      log.warning("libc is not found. Unable to call prctl!")
+      log.warning("Children subreaping is disabled!")
+      return
+    libc = ctypes.CDLL(library_name, use_errno=True)
+    # If we are on a system where prctl doesn't exist, this will throw an
+    # attribute error.
+    ret = libc.prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0)
+    if ret != 0:
+      errno = ctypes.get_errno()
+      raise OSError(errno, os.strerror(errno))
+  except Exception as e:
+    log.error("Unable to call prctl %s" % e)
+    log.error("Children subreaping is disabled!")

http://git-wip-us.apache.org/repos/asf/aurora/blob/84bde070/src/main/python/apache/thermos/core/process.py
----------------------------------------------------------------------
diff --git a/src/main/python/apache/thermos/core/process.py b/src/main/python/apache/thermos/core/process.py
index 13f9ad5..496b540 100644
--- a/src/main/python/apache/thermos/core/process.py
+++ b/src/main/python/apache/thermos/core/process.py
@@ -94,7 +94,6 @@ class ProcessBase(object):
   class CheckpointError(Error): pass
   class UnspecifiedSandbox(Error): pass
   class PermissionError(Error): pass
-  class ForkError(Error): pass
 
   CONTROL_WAIT_CHECK_INTERVAL = Amount(100, Time.MILLISECONDS)
   MAXIMUM_CONTROL_WAIT = Amount(1, Time.MINUTES)
@@ -285,16 +284,7 @@ class ProcessBase(object):
                           # calls _getpwuid which can raise:
                           #    UnknownUserError
                           #    PermissionError
-    try:
-      self._pid = self._platform.fork()  # calls setup_child_subreaping which can
-                                         # raise OSError or RuntimeError
-    except (OSError, RuntimeError) as e:
-      # Reraise the exceptions possible from the fork as Process.Error
-      # Note only Python 3 has nice exception chaining, so we do our best here
-      # by logging the original exception and raising ForkError
-      msg = 'Error trying to fork process %s'.format(self._name)
-      self._log(msg, exc_info=True)
-      raise self.ForkError(msg)
+    self._pid = self._platform.fork()
     if self._pid == 0:
       self._pid = self._platform.getpid()
       self._wait_for_control()  # can raise CheckpointError

http://git-wip-us.apache.org/repos/asf/aurora/blob/84bde070/src/test/python/apache/thermos/core/test_staged_kill.py
----------------------------------------------------------------------
diff --git a/src/test/python/apache/thermos/core/test_staged_kill.py b/src/test/python/apache/thermos/core/test_staged_kill.py
index 9167e60..4de735f 100644
--- a/src/test/python/apache/thermos/core/test_staged_kill.py
+++ b/src/test/python/apache/thermos/core/test_staged_kill.py
@@ -248,6 +248,7 @@ class TestRunnerKillProcessGroup(RunnerBase):
     task = Task(name="task", processes=[Process(name="process", cmdline=SIMPLEFORK_SCRIPT)])
     return task.interpolate()[0]
 
+  @pytest.mark.skipif('True', reason='Flaky test (AURORA-1809)')
   def test_pg_is_killed(self):
     runner = self.start_runner()
     tm = TaskMonitor(runner.tempdir, runner.task_id)