You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by sh...@apache.org on 2018/05/02 19:21:52 UTC
[17/50] [abbrv] hadoop git commit: YARN-7189. Container-executor
doesn't remove Docker containers that error out early. Contributed by Eric
Badger
YARN-7189. Container-executor doesn't remove Docker containers that error out early. Contributed by Eric Badger
(cherry picked from commit 391ac5cdd2f31db2341bb731daee094b9ca309ec)
Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5ec195ed
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5ec195ed
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5ec195ed
Branch: refs/heads/YARN-8200
Commit: 5ec195edbcd982a3e7c2a4ea760e3ce860c87143
Parents: 88cb461
Author: Jason Lowe <jl...@apache.org>
Authored: Tue Apr 17 09:45:55 2018 -0500
Committer: Jason Lowe <jl...@apache.org>
Committed: Tue Apr 17 09:53:19 2018 -0500
----------------------------------------------------------------------
.../impl/container-executor.c | 59 +++++++++++++++-----
1 file changed, 44 insertions(+), 15 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hadoop/blob/5ec195ed/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
----------------------------------------------------------------------
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
index c1a42ca..109ff73 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c
@@ -1444,7 +1444,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
if (exit_code != 0) {
fprintf(ERRORFILE, "Could not create script path\n");
fflush(ERRORFILE);
- goto cleanup;
+ goto pre_launch_cleanup;
}
fprintf(LOGFILE, "Creating local dirs...\n");
@@ -1455,7 +1455,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
if (exit_code != 0) {
fprintf(ERRORFILE, "Could not create local files and directories %d %d\n", container_file_source, cred_file_source);
fflush(ERRORFILE);
- goto cleanup;
+ goto pre_launch_cleanup;
}
docker_command = construct_docker_command(command_file);
@@ -1467,14 +1467,14 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
exit_code = OUT_OF_MEMORY;
fprintf(ERRORFILE, "Container out of memory");
fflush(ERRORFILE);
- goto cleanup;
+ goto pre_launch_cleanup;
}
fprintf(LOGFILE, "Changing effective user to root...\n");
if (change_effective_user(0, user_gid) != 0) {
fprintf(ERRORFILE, "Could not change to effective users %d, %d\n", 0, user_gid);
fflush(ERRORFILE);
- goto cleanup;
+ goto pre_launch_cleanup;
}
snprintf(docker_command_with_binary, command_size, "%s %s", docker_binary, docker_command);
@@ -1487,7 +1487,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
"Could not invoke docker %s.\n", docker_command_with_binary);
fflush(ERRORFILE);
exit_code = UNABLE_TO_EXECUTE_CONTAINER_SCRIPT;
- goto cleanup;
+ goto post_launch_cleanup;
}
snprintf(docker_inspect_command, command_size,
@@ -1504,7 +1504,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
"Could not inspect docker to get pid %s.\n", docker_inspect_command);
fflush(ERRORFILE);
exit_code = UNABLE_TO_EXECUTE_CONTAINER_SCRIPT;
- goto cleanup;
+ goto post_launch_cleanup;
}
if (pid != 0) {
@@ -1519,7 +1519,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
if (strcmp(*cgroup_ptr, "none") != 0 &&
write_pid_to_cgroup_as_root(*cgroup_ptr, pid) != 0) {
exit_code = WRITE_CGROUP_FAILED;
- goto cleanup;
+ goto post_launch_cleanup;
}
}
}
@@ -1532,7 +1532,7 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
exit_code = WRITE_PIDFILE_FAILED;
fprintf(ERRORFILE, "Could not write pid to %s", pid_file);
fflush(ERRORFILE);
- goto cleanup;
+ goto post_launch_cleanup;
}
snprintf(docker_wait_command, command_size,
@@ -1578,20 +1578,49 @@ int launch_docker_container_as_user(const char * user, const char *app_id,
}
}
+post_launch_cleanup:
+
fprintf(LOGFILE, "Removing docker container post-exit...\n");
snprintf(docker_rm_command, command_size,
"%s rm %s", docker_binary, container_id);
- FILE* rm_docker = popen(docker_rm_command, "w");
- if (pclose (rm_docker) != 0)
- {
- fprintf (ERRORFILE,
- "Could not remove container %s.\n", docker_rm_command);
+ int rc, i, sleep_time = 1, max_iterations = 5;
+ for (i = 0; i < max_iterations; i++) {
+ if (i > 0) {
+ sleep(sleep_time);
+ sleep_time *= 2;
+ }
+ FILE* rm_docker = popen(docker_rm_command, "w");
+ if (rm_docker == 0) {
+ fprintf(ERRORFILE,
+ "popen() failed: %s\n", strerror(errno));
+ fflush(ERRORFILE);
+ continue;
+ }
+ rc = pclose(rm_docker);
+ if (rc == -1) {
+ fprintf(ERRORFILE,
+ "pclose() failed: %s\n", strerror(errno));
+ fflush(ERRORFILE);
+ } else if (WIFEXITED(rc)) {
+ if (WEXITSTATUS(rc) == 0) {
+ break;
+ } else {
+ fprintf(ERRORFILE,
+ "docker rm command failed with exit status: %d\n", WEXITSTATUS(rc));
+ fflush(ERRORFILE);
+ }
+ }
+ }
+
+ if (i == max_iterations) {
+ // Tried 5 times and failed.
+ fprintf(ERRORFILE,
+ "Could not remove container after %d tries: %s\n", max_iterations, docker_rm_command);
fflush(ERRORFILE);
exit_code = UNABLE_TO_EXECUTE_CONTAINER_SCRIPT;
- goto cleanup;
}
-cleanup:
+pre_launch_cleanup:
if (exit_code_file != NULL && write_exit_code_file_as_nm(exit_code_file, exit_code) < 0) {
fprintf (ERRORFILE,
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org