You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@mesos.apache.org by "Jian Qiu (JIRA)" <ji...@apache.org> on 2016/11/17 08:37:58 UTC
[jira] [Created] (MESOS-6599) The disordered status update message
from executor may cause agent exit
Jian Qiu created MESOS-6599:
-------------------------------
Summary: The disordered status update message from executor may cause agent exit
Key: MESOS-6599
URL: https://issues.apache.org/jira/browse/MESOS-6599
Project: Mesos
Issue Type: Bug
Components: slave
Environment: CentOS 7.2/Ubuntu 16.04
Reporter: Jian Qiu
The framework enables checkpoint, and the executor sends TaskKiiled to the agent. After the agent acknowledges the status update, the executor sends a TaskLost, and it will cause the agent exits. It is due to the CHECK_READY(future) in Slave::___statusUpdate. Not sure why we need a CHECK here.
The test code as below:
{code}
Try<Owned<cluster::Master>> master = StartMaster();
ASSERT_SOME(master);
MockExecutor exec(DEFAULT_EXECUTOR_ID);
TestContainerizer containerizer(&exec);
Owned<MasterDetector> detector = master.get()->createDetector();
Try<Owned<cluster::Slave>> slave = StartSlave(detector.get(), &containerizer);
ASSERT_SOME(slave);
FrameworkInfo frameworkInfo = DEFAULT_FRAMEWORK_INFO;
frameworkInfo.set_checkpoint(true); // Enable checkpointing.
MockScheduler sched;
MesosSchedulerDriver driver(
&sched, frameworkInfo, master.get()->pid, DEFAULT_CREDENTIAL);
FrameworkID frameworkId;
EXPECT_CALL(sched, registered(_, _, _))
.WillOnce(SaveArg<1>(&frameworkId));
Future<vector<Offer>> offers;
EXPECT_CALL(sched, resourceOffers(_, _))
.WillOnce(FutureArg<1>(&offers))
.WillRepeatedly(Return()); // Ignore subsequent offers.
Future<TaskStatus> status;
EXPECT_CALL(sched, statusUpdate(_, _))
.WillOnce(FutureArg<1>(&status));
driver.start();
AWAIT_READY(offers);
EXPECT_NE(0u, offers.get().size());
ExecutorDriver* execDriver;
EXPECT_CALL(exec, registered(_, _, _, _))
.WillOnce(SaveArg<0>(&execDriver));
EXPECT_CALL(exec, launchTask(_, _))
.WillOnce(SendStatusUpdateFromTask(TASK_RUNNING));
Future<StatusUpdateMessage> statusUpdateMessage =
FUTURE_PROTOBUF(StatusUpdateMessage(), master.get()->pid, _);
Future<Nothing> _statusUpdateAcknowledgement =
FUTURE_DISPATCH(slave.get()->pid, &Slave::_statusUpdateAcknowledgement);
vector<TaskInfo> tasks = createTasks(offers.get()[0]);
driver.launchTasks(offers.get()[0].id(), tasks);
AWAIT_READY(statusUpdateMessage);
StatusUpdate update = statusUpdateMessage.get().update();
AWAIT_READY(status);
EXPECT_EQ(TASK_RUNNING, status.get().state());
AWAIT_READY(_statusUpdateAcknowledgement);
// driver.killTask(tasks[0].task_id());
Future<Nothing> _statusUpdateAcknowledgement2 =
FUTURE_DISPATCH(slave.get()->pid, &Slave::_statusUpdateAcknowledgement);
TaskStatus status3 = status.get();
status3.set_state(TASK_KILLED);
execDriver->sendStatusUpdate(status3);
AWAIT_READY(_statusUpdateAcknowledgement2);
Future<Nothing> _statusUpdate =
FUTURE_DISPATCH(slave.get()->pid, &Slave::___statusUpdate);
TaskStatus status2 = status.get();
status2.set_state(TASK_LOST);
{code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)