You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2016/07/06 04:11:25 UTC
mesos git commit: Added test for using an Nvidia Docker image.
Repository: mesos
Updated Branches:
refs/heads/master 6fd7f8125 -> 516d8ef07
Added test for using an Nvidia Docker image.
This test ensures that when using one of Nvidia's Docker images
(these contain a special label), we mount a volume that contains
the libraries and binaries.
Review: https://reviews.apache.org/r/49678/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/516d8ef0
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/516d8ef0
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/516d8ef0
Branch: refs/heads/master
Commit: 516d8ef0759937eb327893a01e2bdef8c7438b1c
Parents: 6fd7f81
Author: Kevin Klues <kl...@gmail.com>
Authored: Tue Jul 5 20:55:36 2016 -0700
Committer: Benjamin Mahler <bm...@apache.org>
Committed: Tue Jul 5 21:09:59 2016 -0700
----------------------------------------------------------------------
.../containerizer/nvidia_gpu_isolator_tests.cpp | 116 +++++++++++++++++++
1 file changed, 116 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/516d8ef0/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp b/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp
index b551213..7944c0d 100644
--- a/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp
+++ b/src/tests/containerizer/nvidia_gpu_isolator_tests.cpp
@@ -174,6 +174,122 @@ TEST_F(NvidiaGpuTest, ROOT_CGROUPS_NVIDIA_GPU_VerifyDeviceAccess)
}
+// This test verifies that we can enable the Nvidia GPU isolator
+// and launch tasks with restricted access to GPUs while running
+// inside one of Nvidia's images. These images have a special
+// label that indicates that we need to mount a volume containing
+// the Nvidia libraries and binaries. We first launch a task with
+// 1 GPU and verify that a call to `nvidia-smi` both succeeds and
+// reports exactly 1 GPU available. We then launch a task with
+// access to 0 GPUs and verify that a call to `nvidia-smi` fails.
+TEST_F(NvidiaGpuTest, ROOT_INTERNET_CURL_CGROUPS_NVIDIA_GPU_NvidiaDockerImage)
+{
+ Try<Owned<cluster::Master>> master = StartMaster();
+ ASSERT_SOME(master);
+
+ slave::Flags flags = CreateSlaveFlags();
+ flags.isolation = "docker/runtime,filesystem/linux,"
+ "cgroups/devices,gpu/nvidia";
+ flags.image_providers = "docker";
+ flags.nvidia_gpu_devices = vector<unsigned int>({0u});
+ flags.resources = "cpus:1;mem:128;gpus:1";
+
+ Owned<MasterDetector> detector = master.get()->createDetector();
+
+ Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), flags);
+ ASSERT_SOME(slave);
+
+ MockScheduler sched;
+
+ FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
+ frameworkInfo.add_capabilities()->set_type(
+ FrameworkInfo::Capability::GPU_RESOURCES);
+
+ MesosSchedulerDriver driver(
+ &sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
+
+ Future<Nothing> schedRegistered;
+ EXPECT_CALL(sched, registered(_, _, _))
+ .WillOnce(FutureSatisfy(&schedRegistered));
+
+ Future<vector<Offer>> offers1, offers2;
+ EXPECT_CALL(sched, resourceOffers(_, _))
+ .WillOnce(FutureArg<1>(&offers1))
+ .WillOnce(FutureArg<1>(&offers2))
+ .WillRepeatedly(Return()); // Ignore subsequent offers.
+
+ driver.start();
+
+ AWAIT_READY(schedRegistered);
+
+ Image image;
+ image.set_type(Image::DOCKER);
+ image.mutable_docker()->set_name("nvidia/cuda");
+
+ // Launch a task requesting 1 GPU and verify
+ // that `nvidia-smi` lists exactly one GPU.
+ AWAIT_READY(offers1);
+ ASSERT_EQ(1u, offers1->size());
+
+ TaskInfo task1 = createTask(
+ offers1->at(0).slave_id(),
+ Resources::parse("cpus:1;mem:128;gpus:1").get(),
+ "NUM_GPUS=`nvidia-smi --list-gpus | wc -l`;\n"
+ "if [ \"$NUM_GPUS\" != \"1\" ]; then\n"
+ " exit 1;\n"
+ "fi");
+
+ ContainerInfo* container = task1.mutable_container();
+ container->set_type(ContainerInfo::MESOS);
+ container->mutable_mesos()->mutable_image()->CopyFrom(image);
+
+ Future<TaskStatus> statusRunning1, statusFinished1;
+ EXPECT_CALL(sched, statusUpdate(_, _))
+ .WillOnce(FutureArg<1>(&statusRunning1))
+ .WillOnce(FutureArg<1>(&statusFinished1));
+
+ driver.launchTasks(offers1->at(0).id(), {task1});
+
+ // We wait wait up to 120 seconds
+ // to download the docker image.
+ AWAIT_READY_FOR(statusRunning1, Seconds(120));
+ ASSERT_EQ(TASK_RUNNING, statusRunning1->state());
+
+ AWAIT_READY(statusFinished1);
+ ASSERT_EQ(TASK_FINISHED, statusFinished1->state());
+
+ // Launch a task requesting no GPUs and
+ // verify that running `nvidia-smi` fails.
+ AWAIT_READY(offers2);
+ EXPECT_EQ(1u, offers2->size());
+
+ TaskInfo task2 = createTask(
+ offers2->at(0).slave_id(),
+ Resources::parse("cpus:1;mem:128").get(),
+ "nvidia-smi");
+
+ container = task2.mutable_container();
+ container->set_type(ContainerInfo::MESOS);
+ container->mutable_mesos()->mutable_image()->CopyFrom(image);
+
+ Future<TaskStatus> statusRunning2, statusFailed2;
+ EXPECT_CALL(sched, statusUpdate(&driver, _))
+ .WillOnce(FutureArg<1>(&statusRunning2))
+ .WillOnce(FutureArg<1>(&statusFailed2));
+
+ driver.launchTasks(offers2->at(0).id(), {task2});
+
+ AWAIT_READY_FOR(statusRunning2, Seconds(120));
+ ASSERT_EQ(TASK_RUNNING, statusRunning2->state());
+
+ AWAIT_READY(statusFailed2);
+ ASSERT_EQ(TASK_FAILED, statusFailed2->state());
+
+ driver.stop();
+ driver.join();
+}
+
+
// This test verifies correct failure semantics when
// a task requests a fractional number of GPUs.
TEST_F(NvidiaGpuTest, ROOT_CGROUPS_NVIDIA_GPU_FractionalResources)