You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ma...@apache.org on 2018/11/07 09:14:13 UTC
[incubator-mxnet] branch master updated: Fix docker cleanup race
condition (#13092)
This is an automated email from the ASF dual-hosted git repository.
marcoabreu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new f8052e4 Fix docker cleanup race condition (#13092)
f8052e4 is described below
commit f8052e4261238ff6c93465b3f0d0f22457f127ce
Author: Chance Bair <ch...@gmail.com>
AuthorDate: Wed Nov 7 10:13:50 2018 +0100
Fix docker cleanup race condition (#13092)
---
ci/build.py | 92 +++++++++++++++++++++++++++++++------------------------------
1 file changed, 47 insertions(+), 45 deletions(-)
diff --git a/ci/build.py b/ci/build.py
index e2554d9..8f3fe2d 100755
--- a/ci/build.py
+++ b/ci/build.py
@@ -281,7 +281,6 @@ def container_run(platform: str,
# noinspection PyShadowingNames
# runc is default (docker info | grep -i runtime)
runtime = 'nvidia'
-
container = docker_client.containers.run(
tag,
runtime=runtime,
@@ -299,52 +298,55 @@ def container_run(platform: str,
{'bind': '/work/ccache', 'mode': 'rw'},
},
environment=environment)
- logging.info("Started container: %s", trim_container_id(container.id))
- # Race condition:
- # If the previous call is interrupted then it's possible that the container is not cleaned up
- # We avoid by masking the signals temporarily
- cleanup.add_container(container)
- signal.pthread_sigmask(signal.SIG_UNBLOCK, {signal.SIGINT, signal.SIGTERM})
- #
- #############################
-
- stream = container.logs(stream=True, stdout=True, stderr=True)
- sys.stdout.flush()
- for chunk in stream:
- sys.stdout.buffer.write(chunk)
- sys.stdout.buffer.flush()
- sys.stdout.flush()
- stream.close()
- try:
- logging.info("Waiting for status of container %s for %d s.",
- trim_container_id(container.id),
- container_wait_s)
- wait_result = container.wait(timeout=container_wait_s)
- logging.info("Container exit status: %s", wait_result)
- ret = wait_result.get('StatusCode', 200)
- except Exception as e:
- logging.exception(e)
- ret = 150
-
- # Stop
try:
- logging.info("Stopping container: %s", trim_container_id(container.id))
- container.stop()
- except Exception as e:
- logging.exception(e)
- ret = 151
+ logging.info("Started container: %s", trim_container_id(container.id))
+ # Race condition:
+ # If the previous call is interrupted then it's possible that the container is not cleaned up
+ # We avoid by masking the signals temporarily
+ cleanup.add_container(container)
+ signal.pthread_sigmask(signal.SIG_UNBLOCK, {signal.SIGINT, signal.SIGTERM})
+ #
+ #############################
+
+ stream = container.logs(stream=True, stdout=True, stderr=True)
+ sys.stdout.flush()
+ for chunk in stream:
+ sys.stdout.buffer.write(chunk)
+ sys.stdout.buffer.flush()
+ sys.stdout.flush()
+ stream.close()
+ try:
+ logging.info("Waiting for status of container %s for %d s.",
+ trim_container_id(container.id),
+ container_wait_s)
+ wait_result = container.wait(timeout=container_wait_s)
+ logging.info("Container exit status: %s", wait_result)
+ ret = wait_result.get('StatusCode', 200)
+ except Exception as e:
+ logging.exception(e)
+ ret = 150
- # Remove
- try:
- logging.info("Removing container: %s", trim_container_id(container.id))
- container.remove()
- except Exception as e:
- logging.exception(e)
- ret = 152
- cleanup.remove_container(container)
- containers = docker_client.containers.list()
- if containers:
- logging.info("Other running containers: %s", [trim_container_id(x.id) for x in containers])
+ # Stop
+ try:
+ logging.info("Stopping container: %s", trim_container_id(container.id))
+ container.stop()
+ except Exception as e:
+ logging.exception(e)
+ ret = 151
+
+ # Remove
+ try:
+ logging.info("Removing container: %s", trim_container_id(container.id))
+ container.remove()
+ except Exception as e:
+ logging.exception(e)
+ ret = 152
+ cleanup.remove_container(container)
+ containers = docker_client.containers.list()
+ if containers:
+ logging.info("Other running containers: %s", [trim_container_id(x.id) for x in containers])
+ except docker.errors.NotFound as e:
+ logging.info("Container was stopped before cleanup started: %s", e)
return ret