You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by jh...@apache.org on 2019/03/26 18:27:27 UTC
[hadoop] 05/20: YARN-9187. Backport YARN-6852 for GPU-specific
native changes to branch-2
This is an automated email from the ASF dual-hosted git repository.
jhung pushed a commit to branch YARN-8200
in repository https://gitbox.apache.org/repos/asf/hadoop.git
commit f0dcb31f3cb1b012cff14f0475f3ecffa6930c6c
Author: Jonathan Hung <jh...@linkedin.com>
AuthorDate: Wed Jan 9 16:21:43 2019 -0500
YARN-9187. Backport YARN-6852 for GPU-specific native changes to branch-2
---
.../src/CMakeLists.txt | 8 +-
.../container-executor/impl/container-executor.h | 2 +
.../src/main/native/container-executor/impl/main.c | 11 +
.../impl/modules/cgroups/cgroups-operations.c | 161 +++++++++++++++
.../impl/modules/cgroups/cgroups-operations.h | 55 +++++
.../impl/modules/gpu/gpu-module.c | 229 +++++++++++++++++++++
.../impl/modules/gpu/gpu-module.h | 45 ++++
.../test/modules/cgroups/test-cgroups-module.cc | 121 +++++++++++
.../test/modules/gpu/test-gpu-module.cc | 203 ++++++++++++++++++
.../test/test-container-executor.c | 1 -
10 files changed, 833 insertions(+), 3 deletions(-)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt
index 0b1c3e9..e9f8aff 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt
@@ -101,9 +101,11 @@ add_library(container
main/native/container-executor/impl/container-executor.c
main/native/container-executor/impl/get_executable.c
main/native/container-executor/impl/utils/string-utils.c
+ main/native/container-executor/impl/utils/docker-util.c
main/native/container-executor/impl/utils/path-utils.c
+ main/native/container-executor/impl/modules/cgroups/cgroups-operations.c
main/native/container-executor/impl/modules/common/module-configs.c
- main/native/container-executor/impl/utils/docker-util.c
+ main/native/container-executor/impl/modules/gpu/gpu-module.c
)
add_executable(container-executor
@@ -135,6 +137,8 @@ add_executable(cetest
main/native/container-executor/test/utils/test-string-utils.cc
main/native/container-executor/test/utils/test-path-utils.cc
main/native/container-executor/test/test_util.cc
- main/native/container-executor/test/utils/test_docker_util.cc)
+ main/native/container-executor/test/utils/test_docker_util.cc
+ main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc
+ main/native/container-executor/test/modules/gpu/test-gpu-module.cc)
target_link_libraries(cetest gtest container)
output_directory(cetest test)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h
index 956b38c..a78b077 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h
@@ -285,3 +285,5 @@ int execute_regex_match(const char *regex_str, const char *input);
* Return 0 on success.
*/
int validate_docker_image_name(const char *image_name);
+
+struct configuration* get_cfg();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c
index 930dabe..9cf34a0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c
@@ -22,6 +22,8 @@
#include "util.h"
#include "get_executable.h"
#include "utils/string-utils.h"
+#include "modules/gpu/gpu-module.h"
+#include "modules/cgroups/cgroups-operations.h"
#include <errno.h>
#include <grp.h>
@@ -241,6 +243,14 @@ static int validate_arguments(int argc, char **argv , int *operation) {
return INVALID_ARGUMENT_NUMBER;
}
+ /*
+ * Check if it is a known module, if yes, redirect to module
+ */
+ if (strcmp("--module-gpu", argv[1]) == 0) {
+ return handle_gpu_request(&update_cgroups_parameters, "gpu", argc - 1,
+ &argv[1]);
+ }
+
if (strcmp("--checksetup", argv[1]) == 0) {
*operation = CHECK_SETUP;
return 0;
@@ -325,6 +335,7 @@ static int validate_arguments(int argc, char **argv , int *operation) {
return FEATURE_DISABLED;
}
}
+
/* Now we have to validate 'run as user' operations that don't use
a 'long option' - we should fix this at some point. The validation/argument
parsing here is extensive enough that it done in a separate function */
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.c
new file mode 100644
index 0000000..b234109
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.c
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "configuration.h"
+#include "container-executor.h"
+#include "utils/string-utils.h"
+#include "utils/path-utils.h"
+#include "modules/common/module-configs.h"
+#include "modules/common/constants.h"
+#include "modules/cgroups/cgroups-operations.h"
+#include "util.h"
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#define MAX_PATH_LEN 4096
+
+static const struct section* cgroup_cfg_section = NULL;
+
+void reload_cgroups_configuration() {
+ cgroup_cfg_section = get_configuration_section(CGROUPS_SECTION_NAME, get_cfg());
+}
+
+char* get_cgroups_path_to_write(
+ const char* hierarchy_name,
+ const char* param_name,
+ const char* group_id) {
+ int failed = 0;
+ char* buffer = NULL;
+ const char* cgroups_root = get_section_value(CGROUPS_ROOT_KEY,
+ cgroup_cfg_section);
+ const char* yarn_hierarchy_name = get_section_value(
+ CGROUPS_YARN_HIERARCHY_KEY, cgroup_cfg_section);
+
+ // Make sure it is defined.
+ if (!cgroups_root || cgroups_root[0] == 0) {
+ fprintf(ERRORFILE, "%s is not defined in container-executor.cfg\n",
+ CGROUPS_ROOT_KEY);
+ failed = 1;
+ goto cleanup;
+ }
+
+ // Make sure it is defined.
+ if (!yarn_hierarchy_name || yarn_hierarchy_name[0] == 0) {
+ fprintf(ERRORFILE, "%s is not defined in container-executor.cfg\n",
+ CGROUPS_YARN_HIERARCHY_KEY);
+ failed = 1;
+ goto cleanup;
+ }
+
+ buffer = malloc(MAX_PATH_LEN + 1);
+ if (!buffer) {
+ fprintf(ERRORFILE, "Failed to allocate memory for output path.\n");
+ failed = 1;
+ goto cleanup;
+ }
+
+ // Make a path.
+ // CGroups path should not be too long.
+ if (snprintf(buffer, MAX_PATH_LEN, "%s/%s/%s/%s/%s.%s",
+ cgroups_root, hierarchy_name, yarn_hierarchy_name,
+ group_id, hierarchy_name, param_name) < 0) {
+ fprintf(ERRORFILE, "Failed to print output path.\n");
+ failed = 1;
+ goto cleanup;
+ }
+
+cleanup:
+ if (failed) {
+ if (buffer) {
+ free(buffer);
+ }
+ return NULL;
+ }
+ return buffer;
+}
+
+int update_cgroups_parameters(
+ const char* hierarchy_name,
+ const char* param_name,
+ const char* group_id,
+ const char* value) {
+#ifndef __linux
+ fprintf(ERRORFILE, "Failed to update cgroups parameters, not supported\n");
+ return -1;
+#endif
+ int failure = 0;
+
+ if (!cgroup_cfg_section) {
+ reload_cgroups_configuration();
+ }
+
+ char* full_path = get_cgroups_path_to_write(hierarchy_name, param_name,
+ group_id);
+
+ if (!full_path) {
+ fprintf(ERRORFILE,
+ "Failed to get cgroups path to write, it should be a configuration issue");
+ failure = 1;
+ goto cleanup;
+ }
+
+ if (!verify_path_safety(full_path)) {
+ failure = 1;
+ goto cleanup;
+ }
+
+ // Make sure file exists
+ struct stat sb;
+ if (stat(full_path, &sb) != 0) {
+ fprintf(ERRORFILE, "CGroups: Could not find file to write, %s", full_path);
+ failure = 1;
+ goto cleanup;
+ }
+
+ fprintf(ERRORFILE, "CGroups: Updating cgroups, path=%s, value=%s",
+ full_path, value);
+
+ // Write values to file
+ FILE *f;
+ f = fopen(full_path, "a");
+ if (!f) {
+ fprintf(ERRORFILE, "CGroups: Failed to open cgroups file, %s", full_path);
+ failure = 1;
+ goto cleanup;
+ }
+ if (fprintf(f, "%s", value) < 0) {
+ fprintf(ERRORFILE, "CGroups: Failed to write cgroups file, %s", full_path);
+ fclose(f);
+ failure = 1;
+ goto cleanup;
+ }
+ if (fclose(f) != 0) {
+ fprintf(ERRORFILE, "CGroups: Failed to close cgroups file, %s", full_path);
+ failure = 1;
+ goto cleanup;
+ }
+
+cleanup:
+ if (full_path) {
+ free(full_path);
+ }
+ return -failure;
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.h
new file mode 100644
index 0000000..cf80bcf
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/cgroups/cgroups-operations.h
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _CGROUPS_OPERATIONS_H_
+#define _CGROUPS_OPERATIONS_H_
+
+#define CGROUPS_SECTION_NAME "cgroups"
+#define CGROUPS_ROOT_KEY "root"
+#define CGROUPS_YARN_HIERARCHY_KEY "yarn-hierarchy"
+
+/**
+ * Handle update CGroups parameter update requests:
+ * - hierarchy_name: e.g. devices / cpu,cpuacct
+ * - param_name: e.g. deny
+ * - group_id: e.g. container_x_y
+ * - value: e.g. "a *:* rwm"
+ *
+ * return 0 if succeeded
+ */
+int update_cgroups_parameters(
+ const char* hierarchy_name,
+ const char* param_name,
+ const char* group_id,
+ const char* value);
+
+ /**
+ * Get CGroups path to update. Visible for testing.
+ * Return 0 if succeeded
+ */
+ char* get_cgroups_path_to_write(
+ const char* hierarchy_name,
+ const char* param_name,
+ const char* group_id);
+
+ /**
+ * Reload config from filesystem, visible for testing.
+ */
+ void reload_cgroups_configuration();
+
+#endif
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.c
new file mode 100644
index 0000000..f96645d
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.c
@@ -0,0 +1,229 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "configuration.h"
+#include "container-executor.h"
+#include "utils/string-utils.h"
+#include "modules/gpu/gpu-module.h"
+#include "modules/cgroups/cgroups-operations.h"
+#include "modules/common/module-configs.h"
+#include "modules/common/constants.h"
+#include "util.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <unistd.h>
+
+#define EXCLUDED_GPUS_OPTION "excluded_gpus"
+#define CONTAINER_ID_OPTION "container_id"
+#define DEFAULT_NVIDIA_MAJOR_NUMBER 195
+#define MAX_CONTAINER_ID_LEN 128
+
+static const struct section* cfg_section;
+
+static int internal_handle_gpu_request(
+ update_cgroups_parameters_func update_cgroups_parameters_func_p,
+ size_t n_minor_devices_to_block, int minor_devices[],
+ const char* container_id) {
+ char* allowed_minor_numbers_str = NULL;
+ int* allowed_minor_numbers = NULL;
+ size_t n_allowed_minor_numbers = 0;
+ int return_code = 0;
+
+ if (n_minor_devices_to_block == 0) {
+ // no device to block, just return;
+ return 0;
+ }
+
+ // Get major device number from cfg, if not set, major number of (Nvidia)
+ // will be the default value.
+ int major_device_number;
+ char* major_number_str = get_section_value(GPU_MAJOR_NUMBER_CONFIG_KEY,
+ cfg_section);
+ if (!major_number_str || 0 == major_number_str[0]) {
+ // Default major number of Nvidia devices
+ major_device_number = DEFAULT_NVIDIA_MAJOR_NUMBER;
+ } else {
+ major_device_number = strtol(major_number_str, NULL, 0);
+ }
+
+ // Get allowed minor device numbers from cfg, if not set, means all minor
+ // devices can be used by YARN
+ allowed_minor_numbers_str = get_section_value(
+ GPU_ALLOWED_DEVICES_MINOR_NUMBERS,
+ cfg_section);
+ if (!allowed_minor_numbers_str || 0 == allowed_minor_numbers_str[0]) {
+ allowed_minor_numbers = NULL;
+ } else {
+ int rc = get_numbers_split_by_comma(allowed_minor_numbers_str,
+ &allowed_minor_numbers,
+ &n_allowed_minor_numbers);
+ if (0 != rc) {
+ fprintf(ERRORFILE,
+ "Failed to get allowed minor device numbers from cfg, value=%s\n",
+ allowed_minor_numbers_str);
+ return_code = -1;
+ goto cleanup;
+ }
+
+ // Make sure we're trying to black devices allowed in config
+ for (int i = 0; i < n_minor_devices_to_block; i++) {
+ int found = 0;
+ for (int j = 0; j < n_allowed_minor_numbers; j++) {
+ if (minor_devices[i] == allowed_minor_numbers[j]) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ fprintf(ERRORFILE,
+ "Trying to blacklist device with minor-number=%d which is not on allowed list\n",
+ minor_devices[i]);
+ return_code = -1;
+ goto cleanup;
+ }
+ }
+ }
+
+ // Use cgroup helpers to blacklist devices
+ for (int i = 0; i < n_minor_devices_to_block; i++) {
+ char param_value[128];
+ memset(param_value, 0, sizeof(param_value));
+ snprintf(param_value, sizeof(param_value), "c %d:%d rwm",
+ major_device_number, i);
+
+ int rc = update_cgroups_parameters_func_p("devices", "deny",
+ container_id, param_value);
+
+ if (0 != rc) {
+ fprintf(ERRORFILE, "CGroups: Failed to update cgroups\n");
+ return_code = -1;
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ if (major_number_str) {
+ free(major_number_str);
+ }
+ if (allowed_minor_numbers) {
+ free(allowed_minor_numbers);
+ }
+ if (allowed_minor_numbers_str) {
+ free(allowed_minor_numbers_str);
+ }
+
+ return return_code;
+}
+
+void reload_gpu_configuration() {
+ cfg_section = get_configuration_section(GPU_MODULE_SECTION_NAME, get_cfg());
+}
+
+/*
+ * Format of GPU request commandline:
+ *
+ * c-e gpu --excluded_gpus 0,1,3 --container_id container_x_y
+ */
+int handle_gpu_request(update_cgroups_parameters_func func,
+ const char* module_name, int module_argc, char** module_argv) {
+ if (!cfg_section) {
+ reload_gpu_configuration();
+ }
+
+ if (!module_enabled(cfg_section, GPU_MODULE_SECTION_NAME)) {
+ fprintf(ERRORFILE,
+ "Please make sure gpu module is enabled before using it.\n");
+ return -1;
+ }
+
+ static struct option long_options[] = {
+ {EXCLUDED_GPUS_OPTION, required_argument, 0, 'e' },
+ {CONTAINER_ID_OPTION, required_argument, 0, 'c' },
+ {0, 0, 0, 0}
+ };
+
+ int rc = 0;
+ int c = 0;
+ int option_index = 0;
+
+ int* minor_devices = NULL;
+ char container_id[MAX_CONTAINER_ID_LEN];
+ memset(container_id, 0, sizeof(container_id));
+ size_t n_minor_devices_to_block = 0;
+ int failed = 0;
+
+ optind = 1;
+ while((c = getopt_long(module_argc, module_argv, "e:c:",
+ long_options, &option_index)) != -1) {
+ switch(c) {
+ case 'e':
+ rc = get_numbers_split_by_comma(optarg, &minor_devices,
+ &n_minor_devices_to_block);
+ if (0 != rc) {
+ fprintf(ERRORFILE,
+ "Failed to get minor devices number from command line, value=%s\n",
+ optarg);
+ failed = 1;
+ goto cleanup;
+ }
+ break;
+ case 'c':
+ if (!validate_container_id(optarg)) {
+ fprintf(ERRORFILE,
+ "Specified container_id=%s is invalid\n", optarg);
+ failed = 1;
+ goto cleanup;
+ }
+ strncpy(container_id, optarg, MAX_CONTAINER_ID_LEN);
+ break;
+ default:
+ fprintf(ERRORFILE,
+ "Unknown option in gpu command character %d %c, optionindex = %d\n",
+ c, c, optind);
+ failed = 1;
+ goto cleanup;
+ }
+ }
+
+ if (0 == container_id[0]) {
+ fprintf(ERRORFILE,
+ "[%s] --container_id must be specified.\n", __func__);
+ failed = 1;
+ goto cleanup;
+ }
+
+ if (!minor_devices) {
+ // Minor devices is null, skip following call.
+ fprintf(ERRORFILE, "is not specified, skip cgroups call.\n");
+ goto cleanup;
+ }
+
+ failed = internal_handle_gpu_request(func, n_minor_devices_to_block,
+ minor_devices,
+ container_id);
+
+cleanup:
+ if (minor_devices) {
+ free(minor_devices);
+ }
+ return failed;
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.h
new file mode 100644
index 0000000..59d4c7e
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/modules/gpu/gpu-module.h
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef __FreeBSD__
+#define _WITH_GETLINE
+#endif
+
+#ifndef _MODULES_GPU_GPU_MUDULE_H_
+#define _MODULES_GPU_GPU_MUDULE_H_
+
+#define GPU_MAJOR_NUMBER_CONFIG_KEY "gpu.major-device-number"
+#define GPU_ALLOWED_DEVICES_MINOR_NUMBERS "gpu.allowed-device-minor-numbers"
+#define GPU_MODULE_SECTION_NAME "gpu"
+
+// For unit test stubbing
+typedef int (*update_cgroups_parameters_func)(const char*, const char*,
+ const char*, const char*);
+
+/**
+ * Handle gpu requests
+ */
+int handle_gpu_request(update_cgroups_parameters_func func,
+ const char* module_name, int module_argc, char** module_argv);
+
+/**
+ * Reload config from filesystem, visible for testing.
+ */
+void reload_gpu_configuration();
+
+#endif
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc
new file mode 100644
index 0000000..8ffbe88
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/cgroups/test-cgroups-module.cc
@@ -0,0 +1,121 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <gtest/gtest.h>
+#include <sstream>
+
+extern "C" {
+#include "configuration.h"
+#include "container-executor.h"
+#include "modules/cgroups/cgroups-operations.h"
+#include "test/test-container-executor-common.h"
+#include "util.h"
+}
+
+namespace ContainerExecutor {
+
+class TestCGroupsModule : public ::testing::Test {
+protected:
+ virtual void SetUp() {
+ if (mkdirs(TEST_ROOT, 0755) != 0) {
+ fprintf(ERRORFILE, "Failed to mkdir TEST_ROOT: %s\n", TEST_ROOT);
+ exit(1);
+ }
+ LOGFILE = stdout;
+ ERRORFILE = stderr;
+ }
+
+ virtual void TearDown() {}
+};
+
+TEST_F(TestCGroupsModule, test_cgroups_get_path_without_define_root) {
+ // Write config file.
+ const char *filename = TEST_ROOT "/test_cgroups_get_path_without_root.cfg";
+ FILE *file = fopen(filename, "w");
+ if (file == NULL) {
+ printf("FAIL: Could not open configuration file: %s\n", filename);
+ exit(1);
+ }
+ fprintf(file, "[cgroups]\n");
+ fprintf(file, "yarn-hierarchy=yarn\n");
+ fclose(file);
+
+ // Read config file
+ read_executor_config(filename);
+ reload_cgroups_configuration();
+
+ char* path = get_cgroups_path_to_write("devices", "deny", "container_1");
+
+ ASSERT_TRUE(NULL == path) << "Should fail.\n";
+}
+
+TEST_F(TestCGroupsModule, test_cgroups_get_path_without_define_yarn_hierarchy) {
+ // Write config file.
+ const char *filename = TEST_ROOT "/test_cgroups_get_path_without_root.cfg";
+ FILE *file = fopen(filename, "w");
+
+ ASSERT_TRUE(file) << "FAIL: Could not open configuration file: " << filename
+ << "\n";
+ fprintf(file, "[cgroups]\n");
+ fprintf(file, "root=/sys/fs/cgroups\n");
+ fclose(file);
+
+ // Read config file
+ read_executor_config(filename);
+ reload_cgroups_configuration();
+ char* path = get_cgroups_path_to_write("devices", "deny", "container_1");
+
+ ASSERT_TRUE(NULL == path) << "Should fail.\n";
+}
+
+TEST_F(TestCGroupsModule, test_cgroups_get_path_succeeded) {
+ // Write config file.
+ const char *filename = TEST_ROOT "/test_cgroups_get_path.cfg";
+ FILE *file = fopen(filename, "w");
+
+ ASSERT_TRUE(file) << "FAIL: Could not open configuration file\n";
+ fprintf(file, "[cgroups]\n");
+ fprintf(file, "root=/sys/fs/cgroups \n");
+ fprintf(file, "yarn-hierarchy=yarn \n");
+ fclose(file);
+
+ // Read config file
+ read_executor_config(filename);
+ reload_cgroups_configuration();
+
+ char* path = get_cgroups_path_to_write("devices", "deny", "container_1");
+ ASSERT_TRUE(NULL != path) << "Should success.\n";
+
+ const char *EXPECTED =
+ "/sys/fs/cgroups/devices/yarn/container_1/devices.deny";
+
+ ASSERT_STREQ(EXPECTED, path)
+ << "Return cgroup-path-to-write is not expected\n";
+}
+} // namespace ContainerExecutor
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/gpu/test-gpu-module.cc b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/gpu/test-gpu-module.cc
new file mode 100644
index 0000000..7e41fb4
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/modules/gpu/test-gpu-module.cc
@@ -0,0 +1,203 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vector>
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <gtest/gtest.h>
+#include <sstream>
+
+extern "C" {
+#include "configuration.h"
+#include "container-executor.h"
+#include "modules/cgroups/cgroups-operations.h"
+#include "modules/gpu/gpu-module.h"
+#include "test/test-container-executor-common.h"
+#include "util.h"
+}
+
+namespace ContainerExecutor {
+
+class TestGpuModule : public ::testing::Test {
+protected:
+ virtual void SetUp() {
+ if (mkdirs(TEST_ROOT, 0755) != 0) {
+ fprintf(ERRORFILE, "Failed to mkdir TEST_ROOT: %s\n", TEST_ROOT);
+ exit(1);
+ }
+ LOGFILE = stdout;
+ ERRORFILE = stderr;
+ }
+
+ virtual void TearDown() {
+
+ }
+};
+
+static std::vector<const char*> cgroups_parameters_invoked;
+
+static int mock_update_cgroups_parameters(
+ const char* controller_name,
+ const char* param_name,
+ const char* group_id,
+ const char* value) {
+ char* buf = (char*) malloc(128);
+ strcpy(buf, controller_name);
+ cgroups_parameters_invoked.push_back(buf);
+
+ buf = (char*) malloc(128);
+ strcpy(buf, param_name);
+ cgroups_parameters_invoked.push_back(buf);
+
+ buf = (char*) malloc(128);
+ strcpy(buf, group_id);
+ cgroups_parameters_invoked.push_back(buf);
+
+ buf = (char*) malloc(128);
+ strcpy(buf, value);
+ cgroups_parameters_invoked.push_back(buf);
+ return 0;
+}
+
+static void verify_param_updated_to_cgroups(
+ int argc, const char** argv) {
+ ASSERT_EQ(argc, cgroups_parameters_invoked.size());
+
+ int offset = 0;
+ while (offset < argc) {
+ ASSERT_STREQ(argv[offset], cgroups_parameters_invoked[offset]);
+ offset++;
+ }
+}
+
+static void write_and_load_gpu_module_to_cfg(const char* cfg_filepath, int enabled) {
+ FILE *file = fopen(cfg_filepath, "w");
+ if (file == NULL) {
+ printf("FAIL: Could not open configuration file: %s\n", cfg_filepath);
+ exit(1);
+ }
+ fprintf(file, "[gpu]\n");
+ if (enabled) {
+ fprintf(file, "module.enabled=true\n");
+ } else {
+ fprintf(file, "module.enabled=false\n");
+ }
+ fclose(file);
+
+ // Read config file
+ read_executor_config(cfg_filepath);
+ reload_gpu_configuration();
+}
+
+static void test_gpu_module_enabled_disabled(int enabled) {
+ // Write config file.
+ const char *filename = TEST_ROOT "/test_cgroups_module_enabled_disabled.cfg";
+ write_and_load_gpu_module_to_cfg(filename, enabled);
+
+ char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
+ (char*) "--container_id",
+ (char*) "container_1498064906505_0001_01_000001" };
+
+ int rc = handle_gpu_request(&mock_update_cgroups_parameters,
+ "gpu", 5, argv);
+
+ int EXPECTED_RC;
+ if (enabled) {
+ EXPECTED_RC = 0;
+ } else {
+ EXPECTED_RC = -1;
+ }
+ ASSERT_EQ(EXPECTED_RC, rc);
+}
+
+TEST_F(TestGpuModule, test_verify_gpu_module_calls_cgroup_parameter) {
+ // Write config file.
+ const char *filename = TEST_ROOT "/test_verify_gpu_module_calls_cgroup_parameter.cfg";
+ write_and_load_gpu_module_to_cfg(filename, 1);
+
+ char* container_id = (char*) "container_1498064906505_0001_01_000001";
+ char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
+ (char*) "--container_id",
+ container_id };
+
+ /* Test case 1: block 2 devices */
+ cgroups_parameters_invoked.clear();
+ int rc = handle_gpu_request(&mock_update_cgroups_parameters,
+ "gpu", 5, argv);
+ ASSERT_EQ(0, rc) << "Should success.\n";
+
+ // Verify cgroups parameters
+ const char* expected_cgroups_argv[] = { "devices", "deny", container_id, "c 195:0 rwm",
+ "devices", "deny", container_id, "c 195:1 rwm"};
+ verify_param_updated_to_cgroups(8, expected_cgroups_argv);
+
+ /* Test case 2: block 0 devices */
+ cgroups_parameters_invoked.clear();
+ char* argv_1[] = { (char*) "--module-gpu", (char*) "--container_id", container_id };
+ rc = handle_gpu_request(&mock_update_cgroups_parameters,
+ "gpu", 3, argv_1);
+ ASSERT_EQ(0, rc) << "Should success.\n";
+
+ // Verify cgroups parameters
+ verify_param_updated_to_cgroups(0, NULL);
+}
+
+TEST_F(TestGpuModule, test_illegal_cli_parameters) {
+ // Write config file.
+ const char *filename = TEST_ROOT "/test_illegal_cli_parameters.cfg";
+ write_and_load_gpu_module_to_cfg(filename, 1);
+
+ // Illegal container id - 1
+ char* argv[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
+ (char*) "--container_id", (char*) "xxxx" };
+ int rc = handle_gpu_request(&mock_update_cgroups_parameters,
+ "gpu", 5, argv);
+ ASSERT_NE(0, rc) << "Should fail.\n";
+
+ // Illegal container id - 2
+ char* argv_1[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1",
+ (char*) "--container_id", (char*) "container_1" };
+ rc = handle_gpu_request(&mock_update_cgroups_parameters,
+ "gpu", 5, argv_1);
+ ASSERT_NE(0, rc) << "Should fail.\n";
+
+ // Illegal container id - 3
+ char* argv_2[] = { (char*) "--module-gpu", (char*) "--excluded_gpus", (char*) "0,1" };
+ rc = handle_gpu_request(&mock_update_cgroups_parameters,
+ "gpu", 3, argv_2);
+ ASSERT_NE(0, rc) << "Should fail.\n";
+}
+
+TEST_F(TestGpuModule, test_gpu_module_disabled) {
+ test_gpu_module_enabled_disabled(0);
+}
+
+TEST_F(TestGpuModule, test_gpu_module_enabled) {
+ test_gpu_module_enabled_disabled(1);
+}
+} // namespace ContainerExecutor
\ No newline at end of file
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
index 9e85b3f..235ea77 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c
@@ -1392,7 +1392,6 @@ int main(int argc, char **argv) {
#endif
test_trim_function();
- run("rm -fr " TEST_ROOT);
printf("\nFinished tests\n");
free(current_username);
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org