You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2016/01/15 08:29:09 UTC

[1/2] mesos git commit: Added flags to set size of completed task/framework history.

Repository: mesos
Updated Branches:
  refs/heads/master b62949e6b -> f99ae0e76


Added flags to set size of completed task/framework history.

The default size of the buffers used to hold the state of completed
tasks/frameworks is very large. However, many users don't care much
about this information when requesting a master's state. Moreover, if a
large number of frameworks request this state simultaneously, the
master can quickly become overwhelmed because the process of generating
this state both blocks the master and takes up a lot of cycles. By
allowing the user to configure the size of the buffers used to hold
this state, we let the user decide how much state is needed.

This commit is based on a pull request generated by Felix Bechstein at:
https://github.com/apache/mesos/pull/82

Review: https://reviews.apache.org/r/42053/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/b843afa1
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/b843afa1
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/b843afa1

Branch: refs/heads/master
Commit: b843afa130c321747a70da2fec9ea3cedaf34c1c
Parents: b62949e
Author: Kevin Klues <kl...@gmail.com>
Authored: Thu Jan 14 22:55:54 2016 -0800
Committer: Benjamin Mahler <be...@gmail.com>
Committed: Thu Jan 14 22:55:54 2016 -0800

----------------------------------------------------------------------
 docs/configuration.md    | 17 +++++++++++++++++
 src/master/constants.cpp |  4 ++--
 src/master/constants.hpp | 11 +++++------
 src/master/flags.cpp     | 10 ++++++++++
 src/master/flags.hpp     |  2 ++
 src/master/master.cpp    |  9 +++++----
 src/master/master.hpp    |  9 ++++++---
 7 files changed, 47 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/b843afa1/docs/configuration.md
----------------------------------------------------------------------
diff --git a/docs/configuration.md b/docs/configuration.md
index 7e0eb95..329924e 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -473,6 +473,23 @@ file:///path/to/file (where file contains one of the above)</code></pre>
   </tr>
   <tr>
     <td>
+      --max_completed_frameworks=VALUE
+    </td>
+    <td>
+      Maximum number of completed frameworks to store in memory. (default: 50)
+    </td>
+  </tr>
+  <tr>
+    <td>
+      --max_completed_tasks_per_framework=VALUE
+    </td>
+    <td>
+      Maximum number of completed tasks per framework to store in memory.
+      (default: 1000)
+    </td>
+  </tr>
+  <tr>
+    <td>
       --max_slave_ping_timeouts=VALUE
     </td>
     <td>

http://git-wip-us.apache.org/repos/asf/mesos/blob/b843afa1/src/master/constants.cpp
----------------------------------------------------------------------
diff --git a/src/master/constants.cpp b/src/master/constants.cpp
index 77dd314..e316f97 100644
--- a/src/master/constants.cpp
+++ b/src/master/constants.cpp
@@ -35,8 +35,8 @@ const size_t DEFAULT_MAX_SLAVE_PING_TIMEOUTS = 5;
 const Duration MIN_SLAVE_REREGISTER_TIMEOUT = Minutes(10);
 const double RECOVERY_SLAVE_REMOVAL_PERCENT_LIMIT = 1.0; // 100%.
 const size_t MAX_REMOVED_SLAVES = 100000;
-const uint32_t MAX_COMPLETED_FRAMEWORKS = 50;
-const uint32_t MAX_COMPLETED_TASKS_PER_FRAMEWORK = 1000;
+const size_t DEFAULT_MAX_COMPLETED_FRAMEWORKS = 50;
+const size_t DEFAULT_MAX_COMPLETED_TASKS_PER_FRAMEWORK = 1000;
 const Duration WHITELIST_WATCH_INTERVAL = Seconds(5);
 const uint32_t TASK_LIMIT = 100;
 const std::string MASTER_INFO_LABEL = "info";

http://git-wip-us.apache.org/repos/asf/mesos/blob/b843afa1/src/master/constants.hpp
----------------------------------------------------------------------
diff --git a/src/master/constants.hpp b/src/master/constants.hpp
index ebab341..2c3299b 100644
--- a/src/master/constants.hpp
+++ b/src/master/constants.hpp
@@ -87,13 +87,12 @@ extern const double RECOVERY_SLAVE_REMOVAL_PERCENT_LIMIT;
 // Maximum number of removed slaves to store in the cache.
 extern const size_t MAX_REMOVED_SLAVES;
 
-// Maximum number of completed frameworks to store in the cache.
-// TODO(thomasm): Make configurable.
-extern const uint32_t MAX_COMPLETED_FRAMEWORKS;
+// Default maximum number of completed frameworks to store in the cache.
+extern const size_t DEFAULT_MAX_COMPLETED_FRAMEWORKS;
 
-// Maximum number of completed tasks per framework to store in the
-// cache.  TODO(thomasm): Make configurable.
-extern const uint32_t MAX_COMPLETED_TASKS_PER_FRAMEWORK;
+// Default maximum number of completed tasks per framework
+// to store in the cache.
+extern const size_t DEFAULT_MAX_COMPLETED_TASKS_PER_FRAMEWORK;
 
 // Time interval to check for updated watchers list.
 extern const Duration WHITELIST_WATCH_INTERVAL;

http://git-wip-us.apache.org/repos/asf/mesos/blob/b843afa1/src/master/flags.cpp
----------------------------------------------------------------------
diff --git a/src/master/flags.cpp b/src/master/flags.cpp
index 60c60c8..66c26be 100644
--- a/src/master/flags.cpp
+++ b/src/master/flags.cpp
@@ -461,4 +461,14 @@ mesos::internal::master::Flags::Flags()
       "\n"
       "Currently there is no support for multiple HTTP authenticators.",
       DEFAULT_HTTP_AUTHENTICATOR);
+
+  add(&Flags::max_completed_frameworks,
+      "max_completed_frameworks",
+      "Maximum number of completed frameworks to store in memory.",
+      DEFAULT_MAX_COMPLETED_FRAMEWORKS);
+
+  add(&Flags::max_completed_tasks_per_framework,
+      "max_completed_tasks_per_framework",
+      "Maximum number of completed tasks per framework to store in memory.",
+      DEFAULT_MAX_COMPLETED_TASKS_PER_FRAMEWORK);
 }

http://git-wip-us.apache.org/repos/asf/mesos/blob/b843afa1/src/master/flags.hpp
----------------------------------------------------------------------
diff --git a/src/master/flags.hpp b/src/master/flags.hpp
index d923b1b..6f53099 100644
--- a/src/master/flags.hpp
+++ b/src/master/flags.hpp
@@ -80,6 +80,8 @@ public:
   size_t max_slave_ping_timeouts;
   std::string authorizers;
   std::string http_authenticators;
+  size_t max_completed_frameworks;
+  size_t max_completed_tasks_per_framework;
 
 #ifdef WITH_NETWORK_ISOLATOR
   Option<size_t> max_executors_per_slave;

http://git-wip-us.apache.org/repos/asf/mesos/blob/b843afa1/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index 863a11c..9ee5627 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -293,6 +293,7 @@ Master::Master(
     contender(_contender),
     detector(_detector),
     authorizer(_authorizer),
+    frameworks(flags),
     authenticator(None()),
     metrics(new Metrics(*this)),
     electedTime(None())
@@ -2070,7 +2071,7 @@ void Master::_subscribe(
     FrameworkInfo frameworkInfo_ = frameworkInfo;
     frameworkInfo_.mutable_id()->CopyFrom(newFrameworkId());
 
-    Framework* framework = new Framework(this, frameworkInfo_, http);
+    Framework* framework = new Framework(this, flags, frameworkInfo_, http);
 
     addFramework(framework);
 
@@ -2157,7 +2158,7 @@ void Master::_subscribe(
     // elected Mesos master to which either an existing scheduler or a
     // failed-over one is connecting. Create a Framework object and add
     // any tasks it has that have been reported by reconnecting slaves.
-    Framework* framework = new Framework(this, frameworkInfo, http);
+    Framework* framework = new Framework(this, flags, frameworkInfo, http);
 
     // Add active tasks and executors to the framework.
     foreachvalue (Slave* slave, slaves.registered) {
@@ -2369,7 +2370,7 @@ void Master::_subscribe(
     FrameworkInfo frameworkInfo_ = frameworkInfo;
     frameworkInfo_.mutable_id()->CopyFrom(newFrameworkId());
 
-    Framework* framework = new Framework(this, frameworkInfo_, from);
+    Framework* framework = new Framework(this, flags, frameworkInfo_, from);
 
     addFramework(framework);
 
@@ -2477,7 +2478,7 @@ void Master::_subscribe(
     // elected Mesos master to which either an existing scheduler or a
     // failed-over one is connecting. Create a Framework object and add
     // any tasks it has that have been reported by reconnecting slaves.
-    Framework* framework = new Framework(this, frameworkInfo, from);
+    Framework* framework = new Framework(this, flags, frameworkInfo, from);
 
     // Add active tasks and executors to the framework.
     foreachvalue (Slave* slave, slaves.registered) {

http://git-wip-us.apache.org/repos/asf/mesos/blob/b843afa1/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index f02d165..3a7e182 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -1348,7 +1348,8 @@ private:
 
   struct Frameworks
   {
-    Frameworks() : completed(MAX_COMPLETED_FRAMEWORKS) {}
+    Frameworks(const Flags& masterFlags)
+      : completed(masterFlags.max_completed_frameworks) {}
 
     hashmap<FrameworkID, Framework*> registered;
     boost::circular_buffer<std::shared_ptr<Framework>> completed;
@@ -1677,6 +1678,7 @@ private:
 struct Framework
 {
   Framework(Master* const _master,
+            const Flags& masterFlags,
             const FrameworkInfo& _info,
             const process::UPID& _pid,
             const process::Time& time = process::Clock::now())
@@ -1687,9 +1689,10 @@ struct Framework
       active(true),
       registeredTime(time),
       reregisteredTime(time),
-      completedTasks(MAX_COMPLETED_TASKS_PER_FRAMEWORK) {}
+      completedTasks(masterFlags.max_completed_tasks_per_framework) {}
 
   Framework(Master* const _master,
+            const Flags& masterFlags,
             const FrameworkInfo& _info,
             const HttpConnection& _http,
             const process::Time& time = process::Clock::now())
@@ -1700,7 +1703,7 @@ struct Framework
       active(true),
       registeredTime(time),
       reregisteredTime(time),
-      completedTasks(MAX_COMPLETED_TASKS_PER_FRAMEWORK) {}
+      completedTasks(masterFlags.max_completed_tasks_per_framework) {}
 
   ~Framework()
   {


[2/2] mesos git commit: Added unit test for framework/task history flags.

Posted by bm...@apache.org.
Added unit test for framework/task history flags.

This commit adds tests to verify that the the max_frameworks and
max_tasks_per_frameworks flags for master work properly. Specifically,
we test to verify that the proper amount of history is maintained for
both 0 values to these flags as well as positive values <= to the total
number of frameworks and tasks per framework actually launched.

Review: https://reviews.apache.org/r/42212/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/f99ae0e7
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/f99ae0e7
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/f99ae0e7

Branch: refs/heads/master
Commit: f99ae0e7618b8a6508ff7d97c290572592508737
Parents: b843afa
Author: Kevin Klues <kl...@gmail.com>
Authored: Thu Jan 14 22:57:11 2016 -0800
Committer: Benjamin Mahler <be...@gmail.com>
Committed: Thu Jan 14 23:29:00 2016 -0800

----------------------------------------------------------------------
 src/tests/master_tests.cpp | 168 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 168 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/f99ae0e7/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index 9638fb8..aa0e0d8 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -3917,6 +3917,174 @@ TEST_F(MasterTest, FrameworksEndpointOneFramework)
   Shutdown();
 }
 
+
+// Test the max_completed_frameworks flag for master.
+TEST_F(MasterTest, MaxCompletedFrameworksFlag)
+{
+  // In order to verify that the proper amount of history
+  // is maintained, we launch exactly 2 frameworks when
+  // 'max_completed_frameworks' is set to 0, 1, and 2. This
+  // covers the cases of maintaining no history, some history
+  // less than the total number of frameworks launched, and
+  // history equal to the total number of frameworks launched.
+  const size_t totalFrameworks = 2;
+  const size_t maxFrameworksArray[] = {0, 1, 2};
+
+  foreach (const size_t maxFrameworks, maxFrameworksArray) {
+    master::Flags masterFlags = CreateMasterFlags();
+    masterFlags.max_completed_frameworks = maxFrameworks;
+
+    Try<PID<Master>> master = StartMaster(masterFlags);
+    ASSERT_SOME(master);
+
+    Try<PID<Slave>> slave = StartSlave();
+    ASSERT_SOME(slave);
+
+    for (size_t i = 0; i < totalFrameworks; i++) {
+      MockScheduler sched;
+      MesosSchedulerDriver schedDriver(
+          &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);
+
+      // Ignore any incoming resource offers to the scheduler.
+      EXPECT_CALL(sched, resourceOffers(_, _))
+        .WillRepeatedly(Return());
+
+      Future<Nothing> schedRegistered;
+      EXPECT_CALL(sched, registered(_, _, _))
+        .WillOnce(FutureSatisfy(&schedRegistered));
+
+      schedDriver.start();
+
+      AWAIT_READY(schedRegistered);
+
+      schedDriver.stop();
+      schedDriver.join();
+    }
+
+    Future<process::http::Response> response =
+      process::http::get(master.get(), "state");
+    AWAIT_READY(response);
+
+    Try<JSON::Object> parse = JSON::parse<JSON::Object>(response->body);
+    ASSERT_SOME(parse);
+    JSON::Object state = parse.get();
+
+    // The number of completed frameworks should match the limit.
+    Result<JSON::Array> completedFrameworks =
+      state.values["completed_frameworks"].as<JSON::Array>();
+
+    EXPECT_EQ(maxFrameworks, completedFrameworks->values.size());
+
+    Stop(slave.get());
+    Stop(master.get());
+  }
+}
+
+
+// Test the max_completed_tasks_per_framework flag for master.
+TEST_F(MasterTest, MaxCompletedTasksPerFrameworkFlag)
+{
+  // We verify that the proper amount of history is maintained
+  // by launching a single framework with exactly 2 tasks. We
+  // do this when setting `max_completed_tasks_per_framework`
+  // to 0, 1, and 2. This covers the cases of maintaining no
+  // history, some history less than the total number of tasks
+  // launched, and history equal to the total number of tasks
+  // launched.
+  const size_t totalTasksPerFramework = 2;
+  const size_t maxTasksPerFrameworkArray[] = {0, 1, 2};
+
+  foreach (const size_t maxTasksPerFramework, maxTasksPerFrameworkArray) {
+    master::Flags masterFlags = CreateMasterFlags();
+    masterFlags.max_completed_tasks_per_framework = maxTasksPerFramework;
+
+    Try<PID<Master>> master = StartMaster(masterFlags);
+    ASSERT_SOME(master);
+
+    MockExecutor exec(DEFAULT_EXECUTOR_ID);
+    EXPECT_CALL(exec, registered(_, _, _, _));
+
+    Try<PID<Slave>> slave = StartSlave(&exec);
+    ASSERT_SOME(slave);
+
+    MockScheduler sched;
+    MesosSchedulerDriver schedDriver(
+        &sched, DEFAULT_FRAMEWORK_INFO, master.get(), DEFAULT_CREDENTIAL);
+
+    Future<Nothing> schedRegistered;
+    EXPECT_CALL(sched, registered(_, _, _))
+      .WillOnce(FutureSatisfy(&schedRegistered));
+
+    schedDriver.start();
+
+    AWAIT_READY(schedRegistered);
+
+    for (size_t i = 0; i < totalTasksPerFramework; i++) {
+      Future<vector<Offer>> offers;
+      EXPECT_CALL(sched, resourceOffers(&schedDriver, _))
+        .WillOnce(FutureArg<1>(&offers))
+        .WillRepeatedly(Return());
+
+      AWAIT_READY(offers);
+      EXPECT_NE(0u, offers->size());
+      Offer offer = offers.get()[0];
+
+      TaskInfo task;
+      task.set_name("");
+      task.mutable_task_id()->set_value(stringify(i));
+      task.mutable_slave_id()->MergeFrom(offer.slave_id());
+      task.mutable_resources()->MergeFrom(offer.resources());
+      task.mutable_executor()->MergeFrom(DEFAULT_EXECUTOR_INFO);
+
+      // Make sure the task passes through its TASK_FINISHED
+      // state properly. We force this state change through
+      // the launchTask() callback on our MockExecutor.
+      Future<TaskStatus> statusFinished;
+      EXPECT_CALL(exec, launchTask(_, _))
+        .WillOnce(SendStatusUpdateFromTask(TASK_FINISHED));
+      EXPECT_CALL(sched, statusUpdate(_, _))
+        .WillOnce(FutureArg<1>(&statusFinished));
+
+      schedDriver.launchTasks(offer.id(), {task});
+
+      AWAIT_READY(statusFinished);
+      EXPECT_EQ(TASK_FINISHED, statusFinished->state());
+    }
+
+    EXPECT_CALL(exec, shutdown(_))
+      .Times(AtMost(1));
+
+    schedDriver.stop();
+    schedDriver.join();
+
+    Future<process::http::Response> response =
+      process::http::get(master.get(), "state");
+    AWAIT_READY(response);
+
+    Try<JSON::Object> parse = JSON::parse<JSON::Object>(response->body);
+    ASSERT_SOME(parse);
+    JSON::Object state = parse.get();
+
+    // There should be only 1 completed framework.
+    Result<JSON::Array> completedFrameworks =
+      state.values["completed_frameworks"].as<JSON::Array>();
+
+    ASSERT_EQ(1u, completedFrameworks->values.size());
+
+    // The number of completed tasks in the completed framework
+    // should match the limit.
+    JSON::Object completedFramework =
+      completedFrameworks->values[0].as<JSON::Object>();
+    Result<JSON::Array> completedTasksPerFramework =
+      completedFramework.values["completed_tasks"].as<JSON::Array>();
+
+    EXPECT_EQ(maxTasksPerFramework, completedTasksPerFramework->values.size());
+
+    Stop(slave.get());
+    Stop(master.get());
+  }
+}
+
 } // namespace tests {
 } // namespace internal {
 } // namespace mesos {