You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by an...@apache.org on 2017/11/02 00:31:27 UTC

[1/2] mesos git commit: Removed metrics removal from Master::failoverFramework().

Repository: mesos
Updated Branches:
  refs/heads/master 712a951b4 -> fa747ac36


Removed metrics removal from Master::failoverFramework().

When a framework upgrades from a PID based driver to an HTTP based
driver, the master removes its per-principal metrics. When the same
framework downgrades back to a PID based driver, the master doesn't
reinstate those metrics. This causes a crash when the master receives a
message from the failed over framework and tries to increment its
metrics.

This patch fixes the issue by removing metrics removal from framework
failover handling code. Note that it doesn't handle the case when the
framework's principal change. This situation is being dealt with
separately in MESOS-2842.

Review: https://reviews.apache.org/r/62240/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/5b93d6ac
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/5b93d6ac
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/5b93d6ac

Branch: refs/heads/master
Commit: 5b93d6ac60725679399fe15233267c02cc9918df
Parents: 712a951
Author: Ilya Pronin <ip...@twopensource.com>
Authored: Wed Nov 1 17:29:49 2017 -0700
Committer: Anand Mazumdar <an...@apache.org>
Committed: Wed Nov 1 17:29:49 2017 -0700

----------------------------------------------------------------------
 src/master/master.cpp | 8 --------
 1 file changed, 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/5b93d6ac/src/master/master.cpp
----------------------------------------------------------------------
diff --git a/src/master/master.cpp b/src/master/master.cpp
index c0d6193..49bc50e 100644
--- a/src/master/master.cpp
+++ b/src/master/master.cpp
@@ -8527,14 +8527,6 @@ void Master::failoverFramework(Framework* framework, const HttpConnection& http)
     Option<string> principal = frameworks.principals[framework->pid.get()];
 
     frameworks.principals.erase(framework->pid.get());
-
-    // Remove the metrics for the principal if this framework is the
-    // last one with this principal.
-    if (principal.isSome() &&
-        !frameworks.principals.containsValue(principal.get())) {
-      CHECK(metrics->frameworks.contains(principal.get()));
-      metrics->frameworks.erase(principal.get());
-    }
   }
 
   framework->updateConnection(http);


[2/2] mesos git commit: Added SchedulerHttpApiTest.UpdateHttpToPidSchedulerAndBack test.

Posted by an...@apache.org.
Added SchedulerHttpApiTest.UpdateHttpToPidSchedulerAndBack test.

This test verifies that we are able to upgrade from a PID based
framework to an HTTP framework and then downgrade back without
restarting the master.

Review: https://reviews.apache.org/r/62241/


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/fa747ac3
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/fa747ac3
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/fa747ac3

Branch: refs/heads/master
Commit: fa747ac3620a6695cfe29ac443f833fad6c991a2
Parents: 5b93d6a
Author: Ilya Pronin <ip...@twopensource.com>
Authored: Wed Nov 1 17:29:58 2017 -0700
Committer: Anand Mazumdar <an...@apache.org>
Committed: Wed Nov 1 17:29:58 2017 -0700

----------------------------------------------------------------------
 src/tests/scheduler_http_api_tests.cpp | 109 ++++++++++++++++++++++++++++
 1 file changed, 109 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/fa747ac3/src/tests/scheduler_http_api_tests.cpp
----------------------------------------------------------------------
diff --git a/src/tests/scheduler_http_api_tests.cpp b/src/tests/scheduler_http_api_tests.cpp
index cc03be0..80e52fb 100644
--- a/src/tests/scheduler_http_api_tests.cpp
+++ b/src/tests/scheduler_http_api_tests.cpp
@@ -639,6 +639,115 @@ TEST_P(SchedulerHttpApiTest, UpdateHttpToPidScheduler)
 }
 
 
+// This test verifies that we are able to upgrade from a PID based
+// framework to HTTP framework and then downgrade back.
+TEST_P(SchedulerHttpApiTest, UpdateHttpToPidSchedulerAndBack)
+{
+  Try<Owned<cluster::Master>> master = StartMaster();
+  ASSERT_SOME(master);
+
+  v1::FrameworkInfo frameworkInfo = v1::DEFAULT_FRAMEWORK_INFO;
+  frameworkInfo.set_failover_timeout(Weeks(2).secs());
+
+  // Start a PID based scheduler instance first.
+  MockScheduler scheduler;
+  MesosSchedulerDriver driver(
+      &scheduler,
+      devolve(frameworkInfo),
+      master.get()->pid,
+      DEFAULT_CREDENTIAL);
+
+  Future<FrameworkID> frameworkId;
+  EXPECT_CALL(scheduler, registered(&driver, _, _))
+    .WillOnce(FutureArg<1>(&frameworkId));
+
+  driver.start();
+
+  AWAIT_READY(frameworkId);
+  ASSERT_NE("", frameworkId.get().value());
+
+  frameworkInfo.mutable_id()->CopyFrom(evolve(frameworkId.get()));
+
+  // Expect "Framework failed over" message.
+  EXPECT_CALL(scheduler, error(&driver, _));
+
+  // Fail over to an HTTP based scheduler instance.
+  Call call;
+  call.set_type(Call::SUBSCRIBE);
+  call.mutable_framework_id()->CopyFrom(frameworkInfo.id());
+  call.mutable_subscribe()->mutable_framework_info()->CopyFrom(frameworkInfo);
+
+  // Retrieve content type passed as a parameter to this test.
+  const string contentType = GetParam();
+
+  process::http::Headers headers = createBasicAuthHeaders(DEFAULT_CREDENTIAL);
+  headers["Accept"] = contentType;
+
+  Future<Response> response = process::http::streaming::post(
+      master.get()->pid,
+      "api/v1/scheduler",
+      headers,
+      serialize(call, contentType),
+      contentType);
+
+  AWAIT_EXPECT_RESPONSE_STATUS_EQ(OK().status, response);
+  AWAIT_EXPECT_RESPONSE_HEADER_EQ("chunked", "Transfer-Encoding", response);
+  ASSERT_EQ(Response::PIPE, response.get().type);
+
+  Option<Pipe::Reader> reader = response.get().reader;
+  ASSERT_SOME(reader);
+
+  auto deserializer = lambda::bind(
+      &SchedulerHttpApiTest::deserialize, this, contentType, lambda::_1);
+
+  Reader<Event> responseDecoder(Decoder<Event>(deserializer), reader.get());
+
+  // Get SUBSCRIBED event and check framework ID.
+  Future<Result<Event>> event = responseDecoder.read();
+  AWAIT_READY(event);
+  ASSERT_SOME(event.get());
+  ASSERT_EQ(Event::SUBSCRIBED, event.get()->type());
+  ASSERT_EQ(frameworkInfo.id(), event.get()->subscribed().framework_id());
+
+  driver.stop();
+  driver.join();
+
+  // Fail over back to a PID based scheduler instance.
+  MockScheduler scheduler2;
+  MesosSchedulerDriver driver2(
+      &scheduler2,
+      devolve(frameworkInfo),
+      master.get()->pid,
+      DEFAULT_CREDENTIAL);
+
+  EXPECT_CALL(scheduler2, registered(&driver2, _, _))
+    .WillOnce(FutureArg<1>(&frameworkId));
+
+  driver2.start();
+
+  AWAIT_READY(frameworkId);
+  ASSERT_EQ(devolve(frameworkInfo.id()), frameworkId.get());
+
+  TaskStatus status;
+  status.mutable_task_id()->set_value("task-1");
+
+  Future<TaskStatus> reconciledStatus;
+  EXPECT_CALL(scheduler2, statusUpdate(&driver2, _))
+    .WillOnce(FutureArg<1>(&reconciledStatus));
+
+  // Reconcile a non-existing task to exercise message handling. The
+  // master used to crash when processing a message from a framework
+  // that upgraded to an HTTP based driver and downgraded back to a
+  // PID based driver, due to missing metrics.
+  driver2.reconcileTasks({status});
+
+  AWAIT_READY(reconciledStatus);
+
+  driver2.stop();
+  driver2.join();
+}
+
+
 TEST_P(SchedulerHttpApiTest, NotAcceptable)
 {
   Try<Owned<cluster::Master>> master = StartMaster();