You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2017/09/01 08:58:27 UTC

[1/2] mesos git commit: Ensured JAVA HTTP adapter propagates a subscription error.

Repository: mesos
Updated Branches:
  refs/heads/1.2.x b82e06904 -> 72b080816


Ensured JAVA HTTP adapter propagates a subscription error.

Prior to this patch, if an error occurred during subscription /
registration to the master, it was not propagated back to the
scheduler if the HTTP adapter was used. This happened because
the HTTP adapter does not call `scheduler.connected` until after
successful registration and hence the scheduler does not try to
send the `SUBSCRIBE` call, without which the adapter does not
send any events to the scheduler.

A fix is to call `scheduler.connected` if an error occurred
before the scheduler had subscribed.

Review: https://reviews.apache.org/r/61579


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/9c6c5436
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/9c6c5436
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/9c6c5436

Branch: refs/heads/1.2.x
Commit: 9c6c5436e918aa32ce34fa13d24f225f5944d9a5
Parents: b82e069
Author: Alexander Rukletsov <al...@apache.org>
Authored: Thu Aug 10 22:34:38 2017 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Fri Sep 1 10:44:37 2017 +0200

----------------------------------------------------------------------
 .../org_apache_mesos_v1_scheduler_V0Mesos.cpp   | 84 ++++++++++++--------
 1 file changed, 53 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/9c6c5436/src/java/jni/org_apache_mesos_v1_scheduler_V0Mesos.cpp
----------------------------------------------------------------------
diff --git a/src/java/jni/org_apache_mesos_v1_scheduler_V0Mesos.cpp b/src/java/jni/org_apache_mesos_v1_scheduler_V0Mesos.cpp
index 1f58fbf..e487f1e 100644
--- a/src/java/jni/org_apache_mesos_v1_scheduler_V0Mesos.cpp
+++ b/src/java/jni/org_apache_mesos_v1_scheduler_V0Mesos.cpp
@@ -189,6 +189,8 @@ protected:
 
   void __received(const Event& event);
 
+  void connect();
+
   void heartbeat();
 
   void disconnect();
@@ -355,35 +357,7 @@ void V0ToV1AdapterProcess::registered(
     const FrameworkID& _frameworkId,
     const MasterInfo& masterInfo)
 {
-  jvm->AttachCurrentThread(JNIENV_CAST(&env), NULL);
-
-  jclass clazz = env->GetObjectClass(jmesos);
-
-  jfieldID scheduler =
-    env->GetFieldID(clazz, "scheduler",
-                    "Lorg/apache/mesos/v1/scheduler/Scheduler;");
-
-  jobject jscheduler = env->GetObjectField(jmesos, scheduler);
-
-  clazz = env->GetObjectClass(jscheduler);
-
-  // scheduler.connected(mesos);
-  jmethodID connected =
-    env->GetMethodID(clazz, "connected",
-                     "(Lorg/apache/mesos/v1/scheduler/Mesos;)V");
-
-  env->ExceptionClear();
-
-  env->CallVoidMethod(jscheduler, connected, jmesos);
-
-  if (env->ExceptionCheck()) {
-    env->ExceptionDescribe();
-    env->ExceptionClear();
-    jvm->DetachCurrentThread();
-    ABORT("Exception thrown during `connected` call");
-  }
-
-  jvm->DetachCurrentThread();
+  connect();
 
   // We need this copy to populate the fields in `Event::Subscribed` upon
   // receiving a `reregistered()` callback later.
@@ -554,6 +528,18 @@ void V0ToV1AdapterProcess::error(const string& message)
 
   event.mutable_error()->set_message(message);
 
+  // There might be an error during the communication with the master or
+  // implicit registration happening on driver initialization. Since
+  // `Scheduler.connect` is called upon a successful registration only, the
+  // scheduler will never try to subscribe and hence will never receive the
+  // error. This workaround satisfies the invariant of the v1 interface that
+  // a scheduler can receive an event only after successfully connecting with
+  // the master.
+  if (!subscribeCall) {
+    LOG(INFO) << "Implicitly connecting the scheduler to send an error";
+    connect();
+  }
+
   received(event);
 }
 
@@ -579,7 +565,8 @@ void V0ToV1AdapterProcess::send(SchedulerDriver* driver, const Call& _call)
 
       // The driver subscribes implicitly with the master upon initialization.
       // For compatibility with the v1 interface, send the already enqueued
-      // subscribed event upon receiving the subscribe request.
+      // subscribed event (or subscription error) upon receiving the subscribe
+      // request.
       _received();
       break;
     }
@@ -699,7 +686,8 @@ void V0ToV1AdapterProcess::send(SchedulerDriver* driver, const Call& _call)
 void V0ToV1AdapterProcess::received(const Event& event)
 {
   // For compatibility with the v1 interface, we only start sending events
-  // once the scheduler has sent the subscribe call.
+  // once the scheduler has sent the subscribe call. An exception to this
+  // is an error event, which can be sent before the subscribe call.
   if (!subscribeCall) {
     pending.push(event);
     return;
@@ -759,6 +747,40 @@ void V0ToV1AdapterProcess::__received(const Event& event)
 }
 
 
+void V0ToV1AdapterProcess::connect()
+{
+  jvm->AttachCurrentThread(JNIENV_CAST(&env), NULL);
+
+  jclass clazz = env->GetObjectClass(jmesos);
+
+  jfieldID scheduler =
+    env->GetFieldID(clazz, "scheduler",
+                    "Lorg/apache/mesos/v1/scheduler/Scheduler;");
+
+  jobject jscheduler = env->GetObjectField(jmesos, scheduler);
+
+  clazz = env->GetObjectClass(jscheduler);
+
+  // scheduler.connected(mesos);
+  jmethodID connected =
+    env->GetMethodID(clazz, "connected",
+                     "(Lorg/apache/mesos/v1/scheduler/Mesos;)V");
+
+  env->ExceptionClear();
+
+  env->CallVoidMethod(jscheduler, connected, jmesos);
+
+  if (env->ExceptionCheck()) {
+    env->ExceptionDescribe();
+    env->ExceptionClear();
+    jvm->DetachCurrentThread();
+    ABORT("Exception thrown during `connected` call");
+  }
+
+  jvm->DetachCurrentThread();
+}
+
+
 void V0ToV1AdapterProcess::heartbeat()
 {
   // It is possible that we were unable to cancel this timer upon a


[2/2] mesos git commit: Added MESOS-7872 to 1.2.3 CHANGELOG.

Posted by al...@apache.org.
Added MESOS-7872 to 1.2.3 CHANGELOG.


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/72b08081
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/72b08081
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/72b08081

Branch: refs/heads/1.2.x
Commit: 72b080816945f085911f0eb3f3e7dae3ad433ce3
Parents: 9c6c543
Author: Alexander Rukletsov <al...@apache.org>
Authored: Fri Sep 1 10:46:47 2017 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Fri Sep 1 10:46:47 2017 +0200

----------------------------------------------------------------------
 CHANGELOG | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/72b08081/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index ae28a1e..436275d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,6 +10,7 @@ All Issues:
   * [MESOS-7858] - Launching a nested container with namespace/pid isolation, with glibc < 2.25, may deadlock the LinuxLauncher and MesosContainerizer.
   * [MESOS-7863] - Agent may drop pending kill task status updates.
   * [MESOS-7865] - Agent may process a kill task and still launch the task.
+  * [MESOS-7872] - Scheduler hang when registration fails.
   * [MESOS-7909] - Ordering dependency between 'linux/capabilities' and 'docker/runtime' isolator.
   * [MESOS-7926] - Abnormal termination of default executor can cause MesosContainerizer::destroy to fail.