You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by al...@apache.org on 2017/09/01 08:58:27 UTC
[1/2] mesos git commit: Ensured JAVA HTTP adapter propagates a
subscription error.
Repository: mesos
Updated Branches:
refs/heads/1.2.x b82e06904 -> 72b080816
Ensured JAVA HTTP adapter propagates a subscription error.
Prior to this patch, if an error occurred during subscription /
registration to the master, it was not propagated back to the
scheduler if the HTTP adapter was used. This happened because
the HTTP adapter does not call `scheduler.connected` until after
successful registration and hence the scheduler does not try to
send the `SUBSCRIBE` call, without which the adapter does not
send any events to the scheduler.
A fix is to call `scheduler.connected` if an error occurred
before the scheduler had subscribed.
Review: https://reviews.apache.org/r/61579
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/9c6c5436
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/9c6c5436
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/9c6c5436
Branch: refs/heads/1.2.x
Commit: 9c6c5436e918aa32ce34fa13d24f225f5944d9a5
Parents: b82e069
Author: Alexander Rukletsov <al...@apache.org>
Authored: Thu Aug 10 22:34:38 2017 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Fri Sep 1 10:44:37 2017 +0200
----------------------------------------------------------------------
.../org_apache_mesos_v1_scheduler_V0Mesos.cpp | 84 ++++++++++++--------
1 file changed, 53 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/9c6c5436/src/java/jni/org_apache_mesos_v1_scheduler_V0Mesos.cpp
----------------------------------------------------------------------
diff --git a/src/java/jni/org_apache_mesos_v1_scheduler_V0Mesos.cpp b/src/java/jni/org_apache_mesos_v1_scheduler_V0Mesos.cpp
index 1f58fbf..e487f1e 100644
--- a/src/java/jni/org_apache_mesos_v1_scheduler_V0Mesos.cpp
+++ b/src/java/jni/org_apache_mesos_v1_scheduler_V0Mesos.cpp
@@ -189,6 +189,8 @@ protected:
void __received(const Event& event);
+ void connect();
+
void heartbeat();
void disconnect();
@@ -355,35 +357,7 @@ void V0ToV1AdapterProcess::registered(
const FrameworkID& _frameworkId,
const MasterInfo& masterInfo)
{
- jvm->AttachCurrentThread(JNIENV_CAST(&env), NULL);
-
- jclass clazz = env->GetObjectClass(jmesos);
-
- jfieldID scheduler =
- env->GetFieldID(clazz, "scheduler",
- "Lorg/apache/mesos/v1/scheduler/Scheduler;");
-
- jobject jscheduler = env->GetObjectField(jmesos, scheduler);
-
- clazz = env->GetObjectClass(jscheduler);
-
- // scheduler.connected(mesos);
- jmethodID connected =
- env->GetMethodID(clazz, "connected",
- "(Lorg/apache/mesos/v1/scheduler/Mesos;)V");
-
- env->ExceptionClear();
-
- env->CallVoidMethod(jscheduler, connected, jmesos);
-
- if (env->ExceptionCheck()) {
- env->ExceptionDescribe();
- env->ExceptionClear();
- jvm->DetachCurrentThread();
- ABORT("Exception thrown during `connected` call");
- }
-
- jvm->DetachCurrentThread();
+ connect();
// We need this copy to populate the fields in `Event::Subscribed` upon
// receiving a `reregistered()` callback later.
@@ -554,6 +528,18 @@ void V0ToV1AdapterProcess::error(const string& message)
event.mutable_error()->set_message(message);
+ // There might be an error during the communication with the master or
+ // implicit registration happening on driver initialization. Since
+ // `Scheduler.connect` is called upon a successful registration only, the
+ // scheduler will never try to subscribe and hence will never receive the
+ // error. This workaround satisfies the invariant of the v1 interface that
+ // a scheduler can receive an event only after successfully connecting with
+ // the master.
+ if (!subscribeCall) {
+ LOG(INFO) << "Implicitly connecting the scheduler to send an error";
+ connect();
+ }
+
received(event);
}
@@ -579,7 +565,8 @@ void V0ToV1AdapterProcess::send(SchedulerDriver* driver, const Call& _call)
// The driver subscribes implicitly with the master upon initialization.
// For compatibility with the v1 interface, send the already enqueued
- // subscribed event upon receiving the subscribe request.
+ // subscribed event (or subscription error) upon receiving the subscribe
+ // request.
_received();
break;
}
@@ -699,7 +686,8 @@ void V0ToV1AdapterProcess::send(SchedulerDriver* driver, const Call& _call)
void V0ToV1AdapterProcess::received(const Event& event)
{
// For compatibility with the v1 interface, we only start sending events
- // once the scheduler has sent the subscribe call.
+ // once the scheduler has sent the subscribe call. An exception to this
+ // is an error event, which can be sent before the subscribe call.
if (!subscribeCall) {
pending.push(event);
return;
@@ -759,6 +747,40 @@ void V0ToV1AdapterProcess::__received(const Event& event)
}
+void V0ToV1AdapterProcess::connect()
+{
+ jvm->AttachCurrentThread(JNIENV_CAST(&env), NULL);
+
+ jclass clazz = env->GetObjectClass(jmesos);
+
+ jfieldID scheduler =
+ env->GetFieldID(clazz, "scheduler",
+ "Lorg/apache/mesos/v1/scheduler/Scheduler;");
+
+ jobject jscheduler = env->GetObjectField(jmesos, scheduler);
+
+ clazz = env->GetObjectClass(jscheduler);
+
+ // scheduler.connected(mesos);
+ jmethodID connected =
+ env->GetMethodID(clazz, "connected",
+ "(Lorg/apache/mesos/v1/scheduler/Mesos;)V");
+
+ env->ExceptionClear();
+
+ env->CallVoidMethod(jscheduler, connected, jmesos);
+
+ if (env->ExceptionCheck()) {
+ env->ExceptionDescribe();
+ env->ExceptionClear();
+ jvm->DetachCurrentThread();
+ ABORT("Exception thrown during `connected` call");
+ }
+
+ jvm->DetachCurrentThread();
+}
+
+
void V0ToV1AdapterProcess::heartbeat()
{
// It is possible that we were unable to cancel this timer upon a
[2/2] mesos git commit: Added MESOS-7872 to 1.2.3 CHANGELOG.
Posted by al...@apache.org.
Added MESOS-7872 to 1.2.3 CHANGELOG.
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/72b08081
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/72b08081
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/72b08081
Branch: refs/heads/1.2.x
Commit: 72b080816945f085911f0eb3f3e7dae3ad433ce3
Parents: 9c6c543
Author: Alexander Rukletsov <al...@apache.org>
Authored: Fri Sep 1 10:46:47 2017 +0200
Committer: Alexander Rukletsov <al...@apache.org>
Committed: Fri Sep 1 10:46:47 2017 +0200
----------------------------------------------------------------------
CHANGELOG | 1 +
1 file changed, 1 insertion(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/72b08081/CHANGELOG
----------------------------------------------------------------------
diff --git a/CHANGELOG b/CHANGELOG
index ae28a1e..436275d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -10,6 +10,7 @@ All Issues:
* [MESOS-7858] - Launching a nested container with namespace/pid isolation, with glibc < 2.25, may deadlock the LinuxLauncher and MesosContainerizer.
* [MESOS-7863] - Agent may drop pending kill task status updates.
* [MESOS-7865] - Agent may process a kill task and still launch the task.
+ * [MESOS-7872] - Scheduler hang when registration fails.
* [MESOS-7909] - Ordering dependency between 'linux/capabilities' and 'docker/runtime' isolator.
* [MESOS-7926] - Abnormal termination of default executor can cause MesosContainerizer::destroy to fail.