You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ch...@apache.org on 2018/06/01 01:32:31 UTC
[05/12] mesos git commit: Added a unit test for SLRP operation state
metrics.
Added a unit test for SLRP operation state metrics.
This patch adds the `ROOT_OperationStateMetrics` test that issues a
`CREATE_VOLUME` followed by two `DESTROY_VOLUME`s. The first one will
fail due to an out-of-band deletion of the actual volume, and the second
one will fail due to modifying the resource version.
Review: https://reviews.apache.org/r/65666/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5bdea195
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5bdea195
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5bdea195
Branch: refs/heads/master
Commit: 5bdea1951a63b92dacc4b97ca5dd8b2e86467f98
Parents: 70b407d
Author: Chun-Hung Hsiao <ch...@apache.org>
Authored: Thu May 17 17:45:06 2018 -0700
Committer: Chun-Hung Hsiao <ch...@mesosphere.io>
Committed: Thu May 31 18:29:56 2018 -0700
----------------------------------------------------------------------
.../storage_local_resource_provider_tests.cpp | 301 +++++++++++++++++--
1 file changed, 277 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/5bdea195/src/tests/storage_local_resource_provider_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/storage_local_resource_provider_tests.cpp b/src/tests/storage_local_resource_provider_tests.cpp
index 04a75fc..3a2eec3 100644
--- a/src/tests/storage_local_resource_provider_tests.cpp
+++ b/src/tests/storage_local_resource_provider_tests.cpp
@@ -56,9 +56,9 @@ using mesos::master::detector::StandaloneMasterDetector;
using process::Clock;
using process::Future;
using process::Owned;
+using process::post;
using testing::AtMost;
-using testing::DoAll;
using testing::Not;
using testing::Sequence;
@@ -265,6 +265,12 @@ public:
ASSERT_SOME(write);
}
+ string metricName(const string& basename)
+ {
+ return "resource_providers/" + stringify(TEST_SLRP_TYPE) + "." +
+ stringify(TEST_SLRP_NAME) + "/" + basename;
+ }
+
protected:
Modules modules;
vector<string> slaveWorkDirs;
@@ -2783,9 +2789,9 @@ TEST_F(
}
-// This test verifies that storage local resource provider metrics are
-// properly reported.
-TEST_F(StorageLocalResourceProviderTest, ROOT_Metrics)
+// This test verifies that storage local resource provider properly
+// reports metrics related to CSI plugin terminations.
+TEST_F(StorageLocalResourceProviderTest, ROOT_PluginTerminationMetrics)
{
setupResourceProviderConfig(Gigabytes(4));
@@ -2820,20 +2826,16 @@ TEST_F(StorageLocalResourceProviderTest, ROOT_Metrics)
AWAIT_READY(pluginConnected);
- const string prefix =
- "resource_providers/" + stringify(TEST_SLRP_TYPE) +
- "." + stringify(TEST_SLRP_NAME) + "/";
-
JSON::Object snapshot = Metrics();
- ASSERT_NE(0u, snapshot.values.count(
- prefix + "csi_controller_plugin_terminations"));
- EXPECT_EQ(0, snapshot.values.at(
- prefix + "csi_controller_plugin_terminations"));
- ASSERT_NE(0u, snapshot.values.count(
- prefix + "csi_node_plugin_terminations"));
- EXPECT_EQ(0, snapshot.values.at(
- prefix + "csi_node_plugin_terminations"));
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "csi_controller_plugin_terminations")));
+ EXPECT_EQ(0, snapshot.values.at(metricName(
+ "csi_controller_plugin_terminations")));
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "csi_node_plugin_terminations")));
+ EXPECT_EQ(0, snapshot.values.at(metricName(
+ "csi_node_plugin_terminations")));
// Get the ID of the CSI plugin container.
Future<hashset<ContainerID>> pluginContainers = containerizer->containers();
@@ -2860,14 +2862,265 @@ TEST_F(StorageLocalResourceProviderTest, ROOT_Metrics)
snapshot = Metrics();
- ASSERT_NE(0u, snapshot.values.count(
- prefix + "csi_controller_plugin_terminations"));
- EXPECT_EQ(1, snapshot.values.at(
- prefix + "csi_controller_plugin_terminations"));
- ASSERT_NE(0u, snapshot.values.count(
- prefix + "csi_node_plugin_terminations"));
- EXPECT_EQ(1, snapshot.values.at(
- prefix + "csi_node_plugin_terminations"));
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "csi_controller_plugin_terminations")));
+ EXPECT_EQ(1, snapshot.values.at(metricName(
+ "csi_controller_plugin_terminations")));
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "csi_node_plugin_terminations")));
+ EXPECT_EQ(1, snapshot.values.at(metricName(
+ "csi_node_plugin_terminations")));
+}
+
+
+// This test verifies that storage local resource provider properly
+// reports metrics related to operation states.
+// TODO(chhsiao): Currently there is no way to test the `pending` metric for
+// operations since we have no control over the completion of an operation. Once
+// we support out-of-band CSI plugins through domain sockets, we could test this
+// metric against a mock CSI plugin.
+TEST_F(StorageLocalResourceProviderTest, ROOT_OperationStateMetrics)
+{
+ loadUriDiskProfileAdaptorModule();
+
+ setupResourceProviderConfig(Gigabytes(4));
+ setupDiskProfileMapping();
+
+ master::Flags masterFlags = CreateMasterFlags();
+ masterFlags.allocation_interval = Milliseconds(50);
+
+ Try<Owned<cluster::Master>> master = StartMaster(masterFlags);
+ ASSERT_SOME(master);
+
+ Owned<MasterDetector> detector = master.get()->createDetector();
+
+ slave::Flags slaveFlags = CreateSlaveFlags();
+ slaveFlags.isolation = "filesystem/linux";
+
+ slaveFlags.resource_provider_config_dir = resourceProviderConfigDir;
+ slaveFlags.disk_profile_adaptor = URI_DISK_PROFILE_ADAPTOR_NAME;
+
+ Future<SlaveRegisteredMessage> slaveRegisteredMessage =
+ FUTURE_PROTOBUF(SlaveRegisteredMessage(), _, _);
+
+ Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), slaveFlags);
+ ASSERT_SOME(slave);
+
+ AWAIT_READY(slaveRegisteredMessage);
+
+ // Register a framework to exercise operations.
+ FrameworkInfo framework = DEFAULT_FRAMEWORK_INFO;
+ framework.set_roles(0, "storage");
+
+ MockScheduler sched;
+ MesosSchedulerDriver driver(
+ &sched, framework, master.get()->pid, DEFAULT_CREDENTIAL);
+
+ EXPECT_CALL(sched, registered(&driver, _, _));
+
+ // The framework is expected to see the following offers in sequence:
+ // 1. One containing a RAW disk resource before `CREATE_VOLUME`.
+ // 2. One containing a MOUNT disk resource after `CREATE_VOLUME`.
+ // 3. One containing the same MOUNT disk resource after a failed
+ // `DESTROY_VOLUME`.
+ //
+ // We set up the expectations for these offers as the test progresses.
+ Future<vector<Offer>> rawDiskOffers;
+ Future<vector<Offer>> volumeCreatedOffers;
+ Future<vector<Offer>> operationFailedOffers;
+
+ Sequence offers;
+
+ // We use the following filter to filter offers that do not have
+ // wanted resources for 365 days (the maximum).
+ Filters declineFilters;
+ declineFilters.set_refuse_seconds(Days(365).secs());
+
+ // Decline offers that contain only the agent's default resources.
+ EXPECT_CALL(sched, resourceOffers(&driver, _))
+ .WillRepeatedly(DeclineOffers(declineFilters));
+
+ // We are only interested in any storage pool or created volume which
+ // has a "volume-default" profile.
+ auto hasSourceType = [](
+ const Resource& r,
+ const Resource::DiskInfo::Source::Type& type) {
+ return r.has_disk() &&
+ r.disk().has_source() &&
+ r.disk().source().has_profile() &&
+ r.disk().source().profile() == "volume-default" &&
+ r.disk().source().type() == type;
+ };
+
+ EXPECT_CALL(sched, resourceOffers(&driver, OffersHaveAnyResource(
+ std::bind(hasSourceType, lambda::_1, Resource::DiskInfo::Source::RAW))))
+ .InSequence(offers)
+ .WillOnce(FutureArg<1>(&rawDiskOffers));
+
+ driver.start();
+
+ AWAIT_READY(rawDiskOffers);
+ ASSERT_FALSE(rawDiskOffers->empty());
+
+ Option<Resource> source;
+
+ foreach (const Resource& resource, rawDiskOffers->at(0).resources()) {
+ if (hasSourceType(resource, Resource::DiskInfo::Source::RAW)) {
+ source = resource;
+ break;
+ }
+ }
+
+ ASSERT_SOME(source);
+
+ JSON::Object snapshot = Metrics();
+
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "operations/create_volume/finished")));
+ EXPECT_EQ(0, snapshot.values.at(metricName(
+ "operations/create_volume/finished")));
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "operations/destroy_volume/failed")));
+ EXPECT_EQ(0, snapshot.values.at(metricName(
+ "operations/destroy_volume/failed")));
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "operations/destroy_volume/dropped")));
+ EXPECT_EQ(0, snapshot.values.at(metricName(
+ "operations/destroy_volume/dropped")));
+
+ // Create a volume.
+ EXPECT_CALL(sched, resourceOffers(&driver, OffersHaveAnyResource(
+ std::bind(hasSourceType, lambda::_1, Resource::DiskInfo::Source::MOUNT))))
+ .InSequence(offers)
+ .WillOnce(FutureArg<1>(&volumeCreatedOffers));
+
+ // We use the following filter so that the resources will not be
+ // filtered for 5 seconds (the default).
+ Filters acceptFilters;
+ acceptFilters.set_refuse_seconds(0);
+
+ driver.acceptOffers(
+ {rawDiskOffers->at(0).id()},
+ {CREATE_VOLUME(source.get(), Resource::DiskInfo::Source::MOUNT)},
+ acceptFilters);
+
+ AWAIT_READY(volumeCreatedOffers);
+ ASSERT_FALSE(volumeCreatedOffers->empty());
+
+ Option<Resource> volume;
+
+ foreach (const Resource& resource, volumeCreatedOffers->at(0).resources()) {
+ if (hasSourceType(resource, Resource::DiskInfo::Source::MOUNT)) {
+ volume = resource;
+ break;
+ }
+ }
+
+ ASSERT_SOME(volume);
+ ASSERT_TRUE(volume->disk().source().has_id());
+ ASSERT_TRUE(volume->disk().source().has_metadata());
+ ASSERT_TRUE(volume->disk().source().has_mount());
+ ASSERT_TRUE(volume->disk().source().mount().has_root());
+ EXPECT_FALSE(path::absolute(volume->disk().source().mount().root()));
+
+ snapshot = Metrics();
+
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "operations/create_volume/finished")));
+ EXPECT_EQ(1, snapshot.values.at(metricName(
+ "operations/create_volume/finished")));
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "operations/destroy_volume/failed")));
+ EXPECT_EQ(0, snapshot.values.at(metricName(
+ "operations/destroy_volume/failed")));
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "operations/destroy_volume/dropped")));
+ EXPECT_EQ(0, snapshot.values.at(metricName(
+ "operations/destroy_volume/dropped")));
+
+ // Remove the volume out of band to fail `DESTROY_VOLUME`.
+ Option<string> volumePath;
+
+ foreach (const Label& label, volume->disk().source().metadata().labels()) {
+ if (label.key() == "path") {
+ volumePath = label.value();
+ break;
+ }
+ }
+
+ ASSERT_SOME(volumePath);
+ ASSERT_SOME(os::rmdir(volumePath.get()));
+
+ // Destroy the created volume, which will fail.
+ EXPECT_CALL(sched, resourceOffers(&driver, OffersHaveResource(volume.get())))
+ .InSequence(offers)
+ .WillOnce(FutureArg<1>(&operationFailedOffers))
+ .WillRepeatedly(DeclineOffers(declineFilters)); // Decline further offers.
+
+ driver.acceptOffers(
+ {volumeCreatedOffers->at(0).id()},
+ {DESTROY_VOLUME(volume.get())},
+ acceptFilters);
+
+ AWAIT_READY(operationFailedOffers);
+ ASSERT_FALSE(operationFailedOffers->empty());
+
+ snapshot = Metrics();
+
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "operations/create_volume/finished")));
+ EXPECT_EQ(1, snapshot.values.at(metricName(
+ "operations/create_volume/finished")));
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "operations/destroy_volume/failed")));
+ EXPECT_EQ(1, snapshot.values.at(metricName(
+ "operations/destroy_volume/failed")));
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "operations/destroy_volume/dropped")));
+ EXPECT_EQ(0, snapshot.values.at(metricName(
+ "operations/destroy_volume/dropped")));
+
+ // Destroy the volume again, which will be dropped this time.
+ Future<ApplyOperationMessage> applyOperationMessage =
+ DROP_PROTOBUF(ApplyOperationMessage(), _, _);
+
+ driver.acceptOffers(
+ {operationFailedOffers->at(0).id()},
+ {DESTROY_VOLUME(volume.get())},
+ acceptFilters);
+
+ AWAIT_READY(applyOperationMessage);
+ ASSERT_TRUE(applyOperationMessage
+ ->resource_version_uuid().has_resource_provider_id());
+
+ // Modify the resource version UUID to drop `DESTROY_VOLUME`.
+ Future<UpdateOperationStatusMessage> operationDroppedStatus =
+ FUTURE_PROTOBUF(UpdateOperationStatusMessage(), _, _);
+
+ ApplyOperationMessage spoofedApplyOperationMessage =
+ applyOperationMessage.get();
+ spoofedApplyOperationMessage.mutable_resource_version_uuid()->mutable_uuid()
+ ->set_value(id::UUID::random().toBytes());
+
+ post(master.get()->pid, slave.get()->pid, spoofedApplyOperationMessage);
+
+ AWAIT_READY(operationDroppedStatus);
+ EXPECT_EQ(OPERATION_DROPPED, operationDroppedStatus->status().state());
+
+ snapshot = Metrics();
+
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "operations/create_volume/finished")));
+ EXPECT_EQ(1, snapshot.values.at(metricName(
+ "operations/create_volume/finished")));
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "operations/destroy_volume/failed")));
+ EXPECT_EQ(1, snapshot.values.at(metricName(
+ "operations/destroy_volume/failed")));
+ ASSERT_NE(0u, snapshot.values.count(metricName(
+ "operations/destroy_volume/dropped")));
+ EXPECT_EQ(1, snapshot.values.at(metricName(
+ "operations/destroy_volume/dropped")));
}