You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by bm...@apache.org on 2014/05/15 00:00:59 UTC

[1/2] git commit: Added task gauges to Master.

Repository: mesos
Updated Branches:
  refs/heads/master 270204125 -> 3aebb23c0


Added task gauges to Master.

Review: https://reviews.apache.org/r/21279


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/2fa6f835
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/2fa6f835
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/2fa6f835

Branch: refs/heads/master
Commit: 2fa6f835e8ef301eb2a92dffbf255e5d1b966a4b
Parents: 2702041
Author: Dominic Hamon <dh...@twopensource.com>
Authored: Wed May 14 14:51:39 2014 -0700
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Wed May 14 14:56:59 2014 -0700

----------------------------------------------------------------------
 src/master/master.cpp      | 104 ++++++++++++++++++++++++++++++++++++++++
 src/master/master.hpp      |  28 ++++++++---
 src/tests/master_tests.cpp |   8 ++++
 3 files changed, 133 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/2fa6f835/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index ebc00f6..fcd4ed6 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -3600,6 +3600,20 @@ void Master::removeTask(Task* task)
   allocator->resourcesRecovered(
       task->framework_id(), task->slave_id(), Resources(task->resources()));
 
+  // Update the task state metric.
+  switch (task->state()) {
+    case TASK_FINISHED: ++metrics.tasks_finished; break;
+    case TASK_FAILED:   ++metrics.tasks_failed;   break;
+    case TASK_KILLED:   ++metrics.tasks_killed;   break;
+    case TASK_LOST:     ++metrics.tasks_lost;     break;
+    default:
+      LOG(WARNING) << "Removing task " << task->task_id()
+                   << " of framework " << task->framework_id()
+                   << " and slave " << task->slave_id()
+                   << " in non-terminal state " << task->state();
+      break;
+  }
+
   delete task;
 }
 
@@ -3715,6 +3729,63 @@ double Master::_inactive_slaves()
 }
 
 
+double Master::_tasks_staging()
+{
+  double count = 0.0;
+
+  foreachvalue (Slave* slave, slaves.activated) {
+    typedef hashmap<TaskID, Task*> TaskMap;
+    foreachvalue (const TaskMap& tasks, slave->tasks) {
+      foreachvalue (const Task* task, tasks) {
+        if (task->state() == TASK_STAGING) {
+          count++;
+        }
+      }
+    }
+  }
+
+  return count;
+}
+
+
+double Master::_tasks_starting()
+{
+  double count = 0.0;
+
+  foreachvalue (Slave* slave, slaves.activated) {
+    typedef hashmap<TaskID, Task*> TaskMap;
+    foreachvalue (const TaskMap& tasks, slave->tasks) {
+      foreachvalue (const Task* task, tasks) {
+        if (task->state() == TASK_STARTING) {
+          count++;
+        }
+      }
+    }
+  }
+
+  return count;
+}
+
+
+double Master::_tasks_running()
+{
+  double count = 0.0;
+
+  foreachvalue (Slave* slave, slaves.activated) {
+    typedef hashmap<TaskID, Task*> TaskMap;
+    foreachvalue (const TaskMap& tasks, slave->tasks) {
+      foreachvalue (const Task* task, tasks) {
+        if (task->state() == TASK_RUNNING) {
+          count++;
+        }
+      }
+    }
+  }
+
+  return count;
+}
+
+
 // TODO(dhamon): Consider moving to master/metrics.cpp|hpp.
 // Message counters are named with "messages_" prefix so they can
 // be grouped together alphabetically in the output.
@@ -3740,6 +3811,23 @@ Master::Metrics::Metrics(const Master& master)
     outstanding_offers(
         "master/outstanding_offers",
         defer(master, &Master::_outstanding_offers)),
+    tasks_staging(
+        "master/tasks_staging",
+        defer(master, &Master::_tasks_staging)),
+    tasks_starting(
+        "master/tasks_starting",
+        defer(master, &Master::_tasks_starting)),
+    tasks_running(
+        "master/tasks_running",
+        defer(master, &Master::_tasks_running)),
+    tasks_finished(
+        "master/tasks_finished"),
+    tasks_failed(
+        "master/tasks_failed"),
+    tasks_killed(
+        "master/tasks_killed"),
+    tasks_lost(
+        "master/tasks_lost"),
     dropped_messages(
         "master/dropped_messages"),
     messages_register_framework(
@@ -3806,6 +3894,14 @@ Master::Metrics::Metrics(const Master& master)
 
   process::metrics::add(outstanding_offers);
 
+  process::metrics::add(tasks_staging);
+  process::metrics::add(tasks_starting);
+  process::metrics::add(tasks_running);
+  process::metrics::add(tasks_finished);
+  process::metrics::add(tasks_failed);
+  process::metrics::add(tasks_killed);
+  process::metrics::add(tasks_lost);
+
   process::metrics::add(dropped_messages);
 
   // Messages from schedulers.
@@ -3860,6 +3956,14 @@ Master::Metrics::~Metrics()
 
   process::metrics::remove(outstanding_offers);
 
+  process::metrics::remove(tasks_staging);
+  process::metrics::remove(tasks_starting);
+  process::metrics::remove(tasks_running);
+  process::metrics::remove(tasks_finished);
+  process::metrics::remove(tasks_failed);
+  process::metrics::remove(tasks_killed);
+  process::metrics::remove(tasks_lost);
+
   process::metrics::remove(dropped_messages);
 
   // Messages from schedulers.

http://git-wip-us.apache.org/repos/asf/mesos/blob/2fa6f835/src/master/master.hpp
----------------------------------------------------------------------
diff --git a/src/master/master.hpp b/src/master/master.hpp
index baf2d40..9cd1f75 100644
--- a/src/master/master.hpp
+++ b/src/master/master.hpp
@@ -489,6 +489,15 @@ private:
 
     process::metrics::Gauge outstanding_offers;
 
+    // Task state metrics.
+    process::metrics::Gauge tasks_staging;
+    process::metrics::Gauge tasks_starting;
+    process::metrics::Gauge tasks_running;
+    process::metrics::Counter tasks_finished;
+    process::metrics::Counter tasks_failed;
+    process::metrics::Counter tasks_killed;
+    process::metrics::Counter tasks_lost;
+
     // Message counters.
     process::metrics::Counter dropped_messages;
 
@@ -575,6 +584,10 @@ private:
     return static_cast<double>(size);
   }
 
+  double _tasks_staging();
+  double _tasks_starting();
+  double _tasks_running();
+
   process::Time startTime; // Start time used to calculate uptime.
 };
 
@@ -770,23 +783,24 @@ struct Framework
     resources += task->resources();
   }
 
+  void addCompletedTask(const Task& task)
+  {
+    // TODO(adam-mesos): Check if completed task already exists.
+    completedTasks.push_back(memory::shared_ptr<Task>(new Task(task)));
+  }
+
   void removeTask(Task* task)
   {
     CHECK(tasks.contains(task->task_id()))
       << "Unknown task " << task->task_id()
       << " of framework " << task->framework_id();
 
-    completedTasks.push_back(memory::shared_ptr<Task>(new Task(*task)));
+    addCompletedTask(*task);
+
     tasks.erase(task->task_id());
     resources -= task->resources();
   }
 
-  void addCompletedTask(const Task& task)
-  {
-    // TODO(adam-mesos): Check if completed task already exists.
-    completedTasks.push_back(memory::shared_ptr<Task>(new Task(task)));
-  }
-
   void addOffer(Offer* offer)
   {
     CHECK(!offers.contains(offer)) << "Duplicate offer " << offer->id();

http://git-wip-us.apache.org/repos/asf/mesos/blob/2fa6f835/src/tests/master_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/master_tests.cpp b/src/tests/master_tests.cpp
index ba1229d..7ec94b3 100644
--- a/src/tests/master_tests.cpp
+++ b/src/tests/master_tests.cpp
@@ -1420,6 +1420,14 @@ TEST_F(MasterTest, MetricsInStatsEndpoint)
 
   EXPECT_EQ(1u, stats.values.count("master/outstanding_offers"));
 
+  EXPECT_EQ(1u, stats.values.count("master/tasks_staging"));
+  EXPECT_EQ(1u, stats.values.count("master/tasks_starting"));
+  EXPECT_EQ(1u, stats.values.count("master/tasks_running"));
+  EXPECT_EQ(1u, stats.values.count("master/tasks_finished"));
+  EXPECT_EQ(1u, stats.values.count("master/tasks_failed"));
+  EXPECT_EQ(1u, stats.values.count("master/tasks_killed"));
+  EXPECT_EQ(1u, stats.values.count("master/tasks_lost"));
+
   EXPECT_EQ(1u, stats.values.count("master/dropped_messages"));
 
   // Messages from schedulers.


[2/2] git commit: Changed 'registry' flag default to 'replicated_log'.

Posted by bm...@apache.org.
Changed 'registry' flag default to 'replicated_log'.

Review: https://reviews.apache.org/r/21457


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/3aebb23c
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/3aebb23c
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/3aebb23c

Branch: refs/heads/master
Commit: 3aebb23c0d5588ff3accb778528673851ee0c93f
Parents: 2fa6f83
Author: Benjamin Mahler <bm...@twitter.com>
Authored: Tue May 13 15:23:24 2014 -0700
Committer: Benjamin Mahler <bm...@twitter.com>
Committed: Wed May 14 15:00:41 2014 -0700

----------------------------------------------------------------------
 src/master/flags.hpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/3aebb23c/src/master/flags.hpp
----------------------------------------------------------------------
diff --git a/src/master/flags.hpp b/src/master/flags.hpp
index 3e23eca..db21ab0 100644
--- a/src/master/flags.hpp
+++ b/src/master/flags.hpp
@@ -56,11 +56,13 @@ public:
         "work_dir",
         "Where to store the persistent information stored in the Registry.");
 
+    // TODO(bmahler): Consider removing 'in_memory' as it was only
+    // used before 'replicated_log' was implemented.
     add(&Flags::registry,
         "registry",
         "Persistence strategy for the registry;\n"
-        "available options are 'in_memory', 'replicated_log'.",
-        "in_memory");
+        "available options are 'replicated_log', 'in_memory' (for testing).",
+        "replicated_log");
 
     // TODO(vinod): Instead of specifying the quorum size consider
     // specifying the number of masters or the list of masters.