You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by vi...@apache.org on 2013/03/13 07:24:06 UTC
svn commit: r1455818 - in /incubator/mesos/trunk: src/common/type_utils.hpp
src/slave/paths.hpp src/slave/slave.cpp src/slave/slave.hpp
third_party/libprocess/third_party/stout/include/stout/os.hpp
Author: vinodkone
Date: Wed Mar 13 06:24:06 2013
New Revision: 1455818
URL: http://svn.apache.org/r1455818
Log:
Added support for incompatible upgrade.
Review: https://reviews.apache.org/r/8763
Modified:
incubator/mesos/trunk/src/common/type_utils.hpp
incubator/mesos/trunk/src/slave/paths.hpp
incubator/mesos/trunk/src/slave/slave.cpp
incubator/mesos/trunk/src/slave/slave.hpp
incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/os.hpp
Modified: incubator/mesos/trunk/src/common/type_utils.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/common/type_utils.hpp?rev=1455818&r1=1455817&r2=1455818&view=diff
==============================================================================
--- incubator/mesos/trunk/src/common/type_utils.hpp (original)
+++ incubator/mesos/trunk/src/common/type_utils.hpp Wed Mar 13 06:24:06 2013
@@ -25,6 +25,7 @@
#include <boost/functional/hash.hpp>
+#include "common/attributes.hpp"
#include "common/resources.hpp"
#include "messages/messages.hpp"
@@ -80,6 +81,12 @@ inline std::ostream& operator << (std::o
}
+inline std::ostream& operator << (std::ostream& stream, const SlaveInfo& slave)
+{
+ return stream << slave.DebugString();
+}
+
+
inline bool operator == (const FrameworkID& left, const FrameworkID& right)
{
return left.value() == right.value();
@@ -255,6 +262,21 @@ inline bool operator == (const ExecutorI
}
+inline bool operator == (const SlaveInfo& left, const SlaveInfo& right)
+{
+ return left.hostname() == right.hostname() &&
+ left.webui_hostname() == right.webui_hostname() &&
+ internal::Resources(left.resources()) ==
+ internal::Resources(right.resources()) &&
+ internal::Attributes(left.attributes()) ==
+ internal::Attributes(right.attributes()) &&
+ left.has_webui_port() == right.has_webui_port() &&
+ (!left.has_webui_port() || (left.webui_port() == right.webui_port())) &&
+ left.has_id() == right.has_id() &&
+ (!left.has_id() || (left.id() == right.id()));
+}
+
+
inline std::size_t hash_value(const FrameworkID& frameworkId)
{
size_t seed = 0;
Modified: incubator/mesos/trunk/src/slave/paths.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/paths.hpp?rev=1455818&r1=1455817&r2=1455818&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/paths.hpp (original)
+++ incubator/mesos/trunk/src/slave/paths.hpp Wed Mar 13 06:24:06 2013
@@ -81,6 +81,12 @@ inline std::string getMetaRootDir(const
}
+inline std::string getArchiveDir(const std::string rootDir)
+{
+ return path::join(rootDir, "archive");
+}
+
+
inline std::string getLatestSlavePath(const std::string& rootDir)
{
return strings::format(LATEST_SLAVE_PATH, rootDir).get();
Modified: incubator/mesos/trunk/src/slave/slave.cpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/slave.cpp?rev=1455818&r1=1455817&r2=1455818&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/slave.cpp (original)
+++ incubator/mesos/trunk/src/slave/slave.cpp Wed Mar 13 06:24:06 2013
@@ -996,14 +996,6 @@ void Slave::_statusUpdateAcknowledgement
<< future.get().error();
return;
}
-
- // If this slave is in 'recover=cleanup' mode, exit after all executors
- // have exited.
- if (flags.recover == "cleanup" && frameworks.empty()) {
- LOG(INFO) << "Slave is shutting down because it was started in cleanup "
- << " recovery mode and all updates have been acknowledged!";
- shutdown();
- }
}
@@ -1550,6 +1542,51 @@ void Slave::executorTerminated(
// Pass ownership of the framework pointer.
completedFrameworks.push_back(std::tr1::shared_ptr<Framework>(framework));
}
+
+ // If this slave is in 'recover=cleanup' mode, exit after all executors
+ // have exited.
+ // TODO(vinod): Ensure all status updates have been acknowledged.
+ if (flags.recover == "cleanup" && frameworks.size() == 0) {
+ cleanup();
+ }
+}
+
+
+void Slave::cleanup()
+{
+ CHECK(flags.recover == "cleanup");
+
+ LOG(INFO) << "Slave is shutting down because it is started with "
+ << " --recover==cleanup and all executors have terminated!";
+
+ string archiveDir = paths::getArchiveDir(flags.work_dir);
+ string metaDir = paths::getMetaRootDir(flags.work_dir);
+
+ // Archive and delete the meta directory, to allow incompatible upgrades.
+ LOG(INFO) << "Archiving and deleting the meta directory '" << metaDir
+ << "' to allow incompatible upgrade!";
+
+ // Create the archive directory, if it doesn't exist.
+ Try<Nothing> result = os::mkdir(archiveDir);
+ if (result.isSome()) {
+ result = os::tar(
+ metaDir, path::join(archiveDir, info.id().value() + ".tar.gz"));
+
+ if (result.isError()) {
+ LOG(ERROR) << "Failed to archive meta directory '" << archiveDir
+ << "': " << result.error();
+ }
+ } else {
+ LOG(ERROR) << "Failed to create archive directory '" << archiveDir
+ << ": " << result.error();
+ }
+
+ result = os::rmdir(metaDir);
+ if (result.isError()) {
+ LOG(ERROR) << "Failed to delete meta directory '" << metaDir << "'";
+ }
+
+ shutdown();
}
@@ -1661,17 +1698,15 @@ Future<Nothing> Slave::recover(bool reco
{
const string& metaDir = paths::getMetaRootDir(flags.work_dir);
- // We consider the absence of 'metaDir' to mean that this is the
- // very first time this slave was started with checkpointing
- // enabled.
+ // We consider the absence of 'metaDir' to mean that this is either
+ // the first time this slave was started with checkpointing enabled
+ // or this slave was started after an upgrade (--recover=cleanup).
if (!os::exists(metaDir)) {
// NOTE: We recover the isolation module here to cleanup any old
// executors (e.g: orphaned cgroups).
return dispatch(isolationModule, &IsolationModule::recover, None());
}
- // TODO(vinod): Check for version and slaveinfo compatibility.
-
// First, recover the slave state.
Result<SlaveState> state = state::recover(metaDir, safe);
if (state.isError()) {
@@ -1685,6 +1720,23 @@ Future<Nothing> Slave::recover(bool reco
return dispatch(isolationModule, &IsolationModule::recover, None());
}
+ // Check for SlaveInfo compatibility.
+ // TODO(vinod): Also check for version compatibility.
+ // NOTE: We set the 'id' field in 'info' from the recovered state,
+ // as a hack to compare the info created from options/flags with
+ // the recovered info.
+ info.mutable_id()->CopyFrom(state.get().id);
+ if (reconnect && !(info == state.get().info.get())) {
+ EXIT(1)
+ << "Incompatible slave info detected.\n"
+ << "Old slave info:\n" << state.get().info.get() << "\n"
+ << "New slave info:\n" << info << "\n"
+ << "To properly upgrade the slave do as follows:\n"
+ << "Step 1: Start the slave (old slave info) with --recover=cleanup.\n"
+ << "Step 2: Wait till the slave kills all executors and shuts down.\n"
+ << "Step 3: Start the upgraded slave (new slave info).\n";
+ }
+
info = state.get().info.get(); // Recover the slave info.
// Recover the status update manager, then the isolation module and
Modified: incubator/mesos/trunk/src/slave/slave.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/src/slave/slave.hpp?rev=1455818&r1=1455817&r2=1455818&view=diff
==============================================================================
--- incubator/mesos/trunk/src/slave/slave.hpp (original)
+++ incubator/mesos/trunk/src/slave/slave.hpp Wed Mar 13 06:24:06 2013
@@ -224,6 +224,10 @@ protected:
const state::SlaveState& state,
bool reconnect);
+ // Called when the slave is started in 'cleanup' recovery mode and
+ // all the executors have terminated.
+ void cleanup();
+
private:
Slave(const Slave&); // No copying.
Slave& operator = (const Slave&); // No assigning.
Modified: incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/os.hpp
URL: http://svn.apache.org/viewvc/incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/os.hpp?rev=1455818&r1=1455817&r2=1455818&view=diff
==============================================================================
--- incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/os.hpp (original)
+++ incubator/mesos/trunk/third_party/libprocess/third_party/stout/include/stout/os.hpp Wed Mar 13 06:24:06 2013
@@ -845,6 +845,23 @@ inline Try<int> shell(std::ostream* os,
}
+// Creates a tar 'archive' with gzip compression, of the given 'path'.
+inline Try<Nothing> tar(const std::string& path, const std::string& archive)
+{
+ Try<int> status =
+ shell(NULL, "tar -czf %s %s", archive.c_str(), path.c_str());
+
+ if (status.isError()) {
+ return Error("Failed to archive " + path + ": " + status.error());
+ } else if (status.get() != 0) {
+ return Error("Non-zero exit status when archiving " + path +
+ ": " + stringify(status.get()));
+ }
+
+ return Nothing();
+}
+
+
// Returns the list of files that match the given (shell) pattern.
inline Try<std::list<std::string> > glob(const std::string& pattern)
{