You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by ji...@apache.org on 2016/04/01 03:01:44 UTC
[2/4] mesos git commit: Implemented recover() method of "network/cni"
isolator.
Implemented recover() method of "network/cni" isolator.
Review: https://reviews.apache.org/r/45383/
Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/7c85517f
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/7c85517f
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/7c85517f
Branch: refs/heads/master
Commit: 7c85517fad42c6bfa36be646ade2917d0f56973c
Parents: 75ee704
Author: Qian Zhang <zh...@cn.ibm.com>
Authored: Thu Mar 31 16:02:18 2016 -0700
Committer: Jie Yu <yu...@gmail.com>
Committed: Thu Mar 31 18:01:36 2016 -0700
----------------------------------------------------------------------
.../mesos/isolators/network/cni/cni.cpp | 281 ++++++++++++++-----
.../mesos/isolators/network/cni/cni.hpp | 4 +-
.../mesos/isolators/network/cni/paths.cpp | 55 ++++
.../mesos/isolators/network/cni/paths.hpp | 12 +
.../mesos/isolators/network/cni/spec.cpp | 18 +-
.../mesos/isolators/network/cni/spec.hpp | 5 +-
6 files changed, 300 insertions(+), 75 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mesos/blob/7c85517f/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp b/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp
index 657aa83..3a71e97 100644
--- a/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp
+++ b/src/slave/containerizer/mesos/isolators/network/cni/cni.cpp
@@ -134,7 +134,7 @@ Try<Isolator*> NetworkCniIsolatorProcess::create(const Flags& flags)
path + "': " + read.error());
}
- Try<spec::NetworkConfig> parse = spec::parse(read.get());
+ Try<spec::NetworkConfig> parse = spec::parseNetworkConfig(read.get());
if (parse.isError()) {
return Error(
"Failed to parse CNI network configuration file '" +
@@ -218,47 +218,6 @@ Try<Isolator*> NetworkCniIsolatorProcess::create(const Flags& flags)
(rootDir.isError() ? rootDir.error() : "No such file or directory"));
}
- // Self bind mount the CNI network information root directory.
- Try<Nothing> mount = fs::mount(
- rootDir.get(),
- rootDir.get(),
- None(),
- MS_BIND,
- NULL);
-
- if (mount.isError()) {
- return Error(
- "Failed to self bind mount '" + rootDir.get() +
- "': " + mount.error());
- }
-
- // Mark the mount as shared + slave.
- mount = fs::mount(
- None(),
- rootDir.get(),
- None(),
- MS_SLAVE,
- NULL);
-
- if (mount.isError()) {
- return Error(
- "Failed to mark mount '" + rootDir.get() +
- "' as a slave mount: " + mount.error());
- }
-
- mount = fs::mount(
- None(),
- rootDir.get(),
- None(),
- MS_SHARED,
- NULL);
-
- if (mount.isError()) {
- return Error(
- "Failed to mark mount '" + rootDir.get() +
- "' as a shared mount: " + mount.error());
- }
-
Result<string> pluginDir = os::realpath(flags.network_cni_plugins_dir.get());
if (!pluginDir.isSome()) {
return Error(
@@ -281,6 +240,153 @@ Future<Nothing> NetworkCniIsolatorProcess::recover(
const list<ContainerState>& states,
const hashset<ContainerID>& orphans)
{
+ foreach (const ContainerState& state, states) {
+ const ContainerID& containerId = state.container_id();
+
+ Try<Nothing> recover = _recover(containerId);
+ if (recover.isError()) {
+ infos.clear();
+ return Failure(
+ "Failed to recover CNI network information for container "
+ + stringify(containerId) + ": " + recover.error());
+ }
+ }
+
+ Try<list<string>> entries = os::ls(rootDir.get());
+ if (entries.isError()) {
+ infos.clear();
+ return Failure(
+ "Unable to list CNI network information root directory '" +
+ rootDir.get() + "': " + entries.error());
+ }
+
+ foreach (const string& entry, entries.get()) {
+ ContainerID containerId;
+ containerId.set_value(Path(entry).basename());
+
+ if (infos.contains(containerId)) {
+ continue;
+ }
+
+ // Recover CNI network information for orphan container.
+ Try<Nothing> recover = _recover(containerId);
+ if (recover.isError()) {
+ infos.clear();
+ return Failure(
+ "Failed to recover CNI network information for orphan container "
+ + stringify(containerId) + ": " + recover.error());
+ }
+
+ // Known orphan containers will be cleaned up by containerizer
+ // using the normal cleanup path. See MESOS-2367 for details.
+ if (orphans.contains(containerId)) {
+ continue;
+ }
+
+ LOG(INFO) << "Removing unknown orphaned container " << containerId;
+
+ cleanup(containerId);
+ }
+
+ return Nothing();
+}
+
+
+Try<Nothing> NetworkCniIsolatorProcess::_recover(
+ const ContainerID& containerId)
+{
+ const string networkInfoDir =
+ paths::getNetworkInfoDir(rootDir.get(), containerId.value());
+
+ if (!os::exists(networkInfoDir)) {
+ // This may occur in two cases:
+ // 1. Executor has exited and the isolator has removed the CNI network
+ // information directory in '_cleanup()' but agent dies before
+ // noticing this.
+ // 2. Agent dies before the isolator creates the CNI network information
+ // directory in 'isolate()'.
+ // For these two cases, we do not need to do anything since there is nothing
+ // to clean up after agent restarts.
+ return Nothing();
+ }
+
+ Try<list<string>> networkNames =
+ paths::getNetworkNames(rootDir.get(), containerId.value());
+
+ if (networkNames.isError()) {
+ return Error(
+ "Failed to find CNI network directories for container " +
+ stringify(containerId) + ": " + networkNames.error());
+ }
+
+ hashmap<string, NetworkInfo> networkInfos;
+ foreach (const string& networkName, networkNames.get()) {
+ if (!networkConfigs.contains(networkName)) {
+ return Error("Unknown CNI network '" + networkName + "'");
+ }
+
+ Try<list<string>> interfaces = paths::getInterfaces(
+ rootDir.get(),
+ containerId.value(),
+ networkName);
+
+ if (interfaces.isError()) {
+ return Error(
+ "Failed to find interface directory for container " +
+ stringify(containerId));
+ }
+
+ // Currently we only support one interface in one CNI network for
+ // a container.
+ if (interfaces.get().size() != 1) {
+ return Error(
+ "Unable to find interface directory for container " +
+ stringify(containerId));
+ }
+
+ NetworkInfo networkInfo;
+ networkInfo.networkName = networkName;
+ networkInfo.ifName = interfaces->front();
+
+ const string networkInfoPath = paths::getNetworkInfoPath(
+ rootDir.get(),
+ containerId.value(),
+ networkInfo.networkName,
+ networkInfo.ifName);
+
+ if (!os::exists(networkInfoPath)) {
+ // This may occur in the case that agent dies before the isolator
+ // checkpoints the output of CNI plugin in '_attach()'.
+ LOG(WARNING)
+ << "The checkpointed CNI plugin output '" << networkInfoPath
+ << "' for container " << containerId << " does not exist";
+ networkInfos.put(networkName, networkInfo);
+ continue;
+ }
+
+ Try<string> read = os::read(networkInfoPath);
+ if (read.isError()) {
+ return Error(
+ "Failed to read CNI network information file '" +
+ networkInfoPath + "': " + read.error());
+ }
+
+ Try<spec::NetworkInfo> parse = spec::parseNetworkInfo(read.get());
+ if (parse.isError()) {
+ return Error(
+ "Failed to parse CNI network information file '" +
+ networkInfoPath + "': " + parse.error());
+ }
+
+ networkInfo.network = parse.get();
+
+ networkInfos.put(networkName, networkInfo);
+ }
+
+ if (!networkInfos.empty()) {
+ infos.put(containerId, Owned<Info>(new Info(networkInfos)));
+ }
+
return Nothing();
}
@@ -533,20 +639,11 @@ Future<Nothing> NetworkCniIsolatorProcess::_attach(
}
// Parse the output of CNI plugin.
- Try<JSON::Object> json = JSON::parse<JSON::Object>(output.get());
- if (json.isError()) {
- return Failure(
- "Failed to parse the output of CNI plugin '" + plugin +
- "' as JSON format: " + json.error());
- }
-
- Try<spec::NetworkInfo> parse =
- ::protobuf::parse<spec::NetworkInfo>(json.get());
-
+ Try<spec::NetworkInfo> parse = spec::parseNetworkInfo(output.get());
if (parse.isError()) {
return Failure(
- "Failed to parse the output of CNI plugin '" + plugin +
- "' as protobuf: " + parse.error());
+ "Failed to parse the output of the CNI plugin '" +
+ plugin + "': " + parse.error());
}
if (parse.get().has_ip4()) {
@@ -617,13 +714,48 @@ Future<ContainerStatus> NetworkCniIsolatorProcess::status(
Future<Nothing> NetworkCniIsolatorProcess::cleanup(
const ContainerID& containerId)
{
- if (!infos.contains(containerId)) {
+ const string networkInfoDir =
+ paths::getNetworkInfoDir(rootDir.get(), containerId.value());
+
+ Option<list<string>> networkNames = None();
+ if (os::exists(networkInfoDir)) {
+ Try<list<string>> result =
+ paths::getNetworkNames(rootDir.get(), containerId.value());
+
+ if (result.isError()) {
+ return Failure(
+ "Failed to find CNI network directories for container " +
+ stringify(containerId) + ": " + result.error());
+ }
+
+ networkNames = result.get();
+ if (networkNames->empty()) {
+ // This is to handle two recovery cases:
+ // 1. Agent dies right before the isolator calls 'attach()' to attach
+ // the container to CNI networks.
+ // 2. Agent dies right before the isolator removes the CNI network
+ // information directory in '_cleanup()'.
+ // For these two cases, we just need to remove the CNI network
+ // information directory here and there is nothing else to clean up.
+ return _cleanup(containerId);
+ }
+ } else {
+ // This is to handle the following two cases:
+ // 1. The isolator fails to create the CNI network information directory
+ // for the container in 'isolate()'.
+ // 2. The container does not want to opt in CNI network.
+ // For 1, we need to remove the container's info struct, and for 2, we do
+ // not need to do anything.
+ if (infos.contains(containerId)) {
+ infos.erase(containerId);
+ }
+
return Nothing();
}
// Invoke CNI plugin to detach container from CNI networks.
list<Future<Nothing>> futures;
- foreachkey (const string& networkName, infos[containerId]->networkInfos) {
+ foreach (const string& networkName, networkNames.get()) {
futures.push_back(detach(containerId, networkName));
}
@@ -754,19 +886,19 @@ Future<Nothing> NetworkCniIsolatorProcess::_detach(
Future<Nothing> NetworkCniIsolatorProcess::_cleanup(
const ContainerID& containerId,
- const list<Future<Nothing>>& invocations)
+ const Option<std::list<Future<Nothing>>>& invocations)
{
- CHECK(infos.contains(containerId));
-
- vector<string> messages;
- for (const Future<Nothing>& invocation : invocations) {
- if (invocation.isFailed()) {
- messages.push_back(invocation.failure());
+ if (invocations.isSome()) {
+ vector<string> messages;
+ for (const Future<Nothing>& invocation : invocations.get()) {
+ if (invocation.isFailed()) {
+ messages.push_back(invocation.failure());
+ }
}
- }
- if (!messages.empty()) {
- return Failure(strings::join("\n", messages));
+ if (!messages.empty()) {
+ return Failure(strings::join("\n", messages));
+ }
}
const string networkInfoDir =
@@ -774,11 +906,13 @@ Future<Nothing> NetworkCniIsolatorProcess::_cleanup(
const string target =
paths::getNamespacePath(rootDir.get(), containerId.value());
- Try<Nothing> unmount = fs::unmount(target, MNT_DETACH);
- if (unmount.isError()) {
- return Failure(
- "Failed to unmount the network namespace handle '" + target + "': " +
- unmount.error());
+ if (os::exists(target)) {
+ Try<Nothing> unmount = fs::unmount(target, MNT_DETACH);
+ if (unmount.isError()) {
+ return Failure(
+ "Failed to unmount the network namespace handle '" + target + "': " +
+ unmount.error());
+ }
}
Try<Nothing> rmdir = os::rmdir(networkInfoDir);
@@ -788,11 +922,14 @@ Future<Nothing> NetworkCniIsolatorProcess::_cleanup(
networkInfoDir + "': " + rmdir.error());
}
- infos.erase(containerId);
+ if (infos.contains(containerId)) {
+ infos.erase(containerId);
+ }
return Nothing();
}
+
} // namespace slave {
} // namespace internal {
} // namespace mesos {
http://git-wip-us.apache.org/repos/asf/mesos/blob/7c85517f/src/slave/containerizer/mesos/isolators/network/cni/cni.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/network/cni/cni.hpp b/src/slave/containerizer/mesos/isolators/network/cni/cni.hpp
index ca642bb..3a07540 100644
--- a/src/slave/containerizer/mesos/isolators/network/cni/cni.hpp
+++ b/src/slave/containerizer/mesos/isolators/network/cni/cni.hpp
@@ -138,7 +138,9 @@ private:
process::Future<Nothing> _cleanup(
const ContainerID& containerId,
- const std::list<process::Future<Nothing>>& invocations);
+ const Option<std::list<process::Future<Nothing>>>& invocations = None());
+
+ Try<Nothing> _recover(const ContainerID& containerId);
// CNI network configurations keyed by network name.
hashmap<std::string, NetworkConfigInfo> networkConfigs;
http://git-wip-us.apache.org/repos/asf/mesos/blob/7c85517f/src/slave/containerizer/mesos/isolators/network/cni/paths.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/network/cni/paths.cpp b/src/slave/containerizer/mesos/isolators/network/cni/paths.cpp
index 611f386..45e7e99 100644
--- a/src/slave/containerizer/mesos/isolators/network/cni/paths.cpp
+++ b/src/slave/containerizer/mesos/isolators/network/cni/paths.cpp
@@ -15,10 +15,12 @@
// limitations under the License.
#include <stout/path.hpp>
+#include <stout/fs.hpp>
#include "slave/containerizer/mesos/isolators/network/cni/paths.hpp"
using std::string;
+using std::list;
namespace mesos {
namespace internal {
@@ -47,6 +49,32 @@ string getNetworkDir(
}
+Try<list<string>> getNetworkNames(
+ const string& rootDir,
+ const string& containerId)
+{
+ const string& networkInfoDir = getNetworkInfoDir(rootDir, containerId);
+
+ Try<list<string>> entries = os::ls(networkInfoDir);
+ if (entries.isError()) {
+ return Error(
+ "Unable to list the CNI network information directory '" +
+ networkInfoDir + "': " + entries.error());
+ }
+
+ list<string> networkNames;
+ foreach (const string& entry, entries.get()) {
+ const string path = path::join(networkInfoDir, entry);
+
+ if (os::stat::isdir(path)) {
+ networkNames.push_back(entry);
+ }
+ }
+
+ return networkNames;
+}
+
+
string getInterfaceDir(
const string& rootDir,
const string& containerId,
@@ -57,6 +85,33 @@ string getInterfaceDir(
}
+Try<list<string>> getInterfaces(
+ const string& rootDir,
+ const string& containerId,
+ const string& networkName)
+{
+ const string& networkDir = getNetworkDir(rootDir, containerId, networkName);
+
+ Try<list<string>> entries = os::ls(networkDir);
+ if (entries.isError()) {
+ return Error(
+ "Unable to list the CNI network directory '" + networkDir + "': " +
+ entries.error());
+ }
+
+ list<string> ifNames;
+ foreach (const string& entry, entries.get()) {
+ const string path = path::join(networkDir, entry);
+
+ if (os::stat::isdir(path)) {
+ ifNames.push_back(entry);
+ }
+ }
+
+ return ifNames;
+}
+
+
string getNetworkInfoPath(
const string& rootDir,
const string& containerId,
http://git-wip-us.apache.org/repos/asf/mesos/blob/7c85517f/src/slave/containerizer/mesos/isolators/network/cni/paths.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/network/cni/paths.hpp b/src/slave/containerizer/mesos/isolators/network/cni/paths.hpp
index f627ec9..edefa90 100644
--- a/src/slave/containerizer/mesos/isolators/network/cni/paths.hpp
+++ b/src/slave/containerizer/mesos/isolators/network/cni/paths.hpp
@@ -18,6 +18,7 @@
#define __ISOLATOR_CNI_PATHS_HPP__
using std::string;
+using std::list;
namespace mesos {
namespace internal {
@@ -53,6 +54,11 @@ string getNetworkDir(
const string& networkName);
+Try<list<string>> getNetworkNames(
+ const string& rootDir,
+ const string& containerId);
+
+
string getInterfaceDir(
const string& rootDir,
const string& containerId,
@@ -60,6 +66,12 @@ string getInterfaceDir(
const string& ifName);
+Try<list<string>> getInterfaces(
+ const string& rootDir,
+ const string& containerId,
+ const string& networkName);
+
+
string getNetworkInfoPath(
const string& rootDir,
const string& containerId,
http://git-wip-us.apache.org/repos/asf/mesos/blob/7c85517f/src/slave/containerizer/mesos/isolators/network/cni/spec.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/network/cni/spec.cpp b/src/slave/containerizer/mesos/isolators/network/cni/spec.cpp
index 5b5f904..938a9f3 100644
--- a/src/slave/containerizer/mesos/isolators/network/cni/spec.cpp
+++ b/src/slave/containerizer/mesos/isolators/network/cni/spec.cpp
@@ -27,7 +27,7 @@ namespace slave {
namespace cni {
namespace spec {
-Try<NetworkConfig> parse(const string& s)
+Try<NetworkConfig> parseNetworkConfig(const string& s)
{
Try<JSON::Object> json = JSON::parse<JSON::Object>(s);
if (json.isError()) {
@@ -43,6 +43,22 @@ Try<NetworkConfig> parse(const string& s)
return parse.get();
}
+Try<NetworkInfo> parseNetworkInfo(const string& s)
+{
+ Try<JSON::Object> json = JSON::parse<JSON::Object>(s);
+ if (json.isError()) {
+ return Error("JSON parse failed: " + json.error());
+ }
+
+ Try<NetworkInfo> parse = ::protobuf::parse<NetworkInfo>(json.get());
+
+ if (parse.isError()) {
+ return Error("Protobuf parse failed: " + parse.error());
+ }
+
+ return parse.get();
+}
+
} // namespace spec {
} // namespace cni {
} // namespace slave {
http://git-wip-us.apache.org/repos/asf/mesos/blob/7c85517f/src/slave/containerizer/mesos/isolators/network/cni/spec.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/isolators/network/cni/spec.hpp b/src/slave/containerizer/mesos/isolators/network/cni/spec.hpp
index 6a3c336..e696741 100644
--- a/src/slave/containerizer/mesos/isolators/network/cni/spec.hpp
+++ b/src/slave/containerizer/mesos/isolators/network/cni/spec.hpp
@@ -28,7 +28,10 @@ namespace slave {
namespace cni {
namespace spec {
-Try<NetworkConfig> parse(const std::string& s);
+Try<NetworkConfig> parseNetworkConfig(const std::string& s);
+
+
+Try<NetworkInfo> parseNetworkInfo(const std::string& s);
} // namespace spec {
} // namespace cni {