You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mesos.apache.org by gi...@apache.org on 2018/06/07 19:12:58 UTC

[4/6] mesos git commit: Supported hdfs fetching in local puller.

Supported hdfs fetching in local puller.

Review: https://reviews.apache.org/r/66561


Project: http://git-wip-us.apache.org/repos/asf/mesos/repo
Commit: http://git-wip-us.apache.org/repos/asf/mesos/commit/044aa6f9
Tree: http://git-wip-us.apache.org/repos/asf/mesos/tree/044aa6f9
Diff: http://git-wip-us.apache.org/repos/asf/mesos/diff/044aa6f9

Branch: refs/heads/master
Commit: 044aa6f90def775d9d2e7172320119b181e71575
Parents: b41a170
Author: Gilbert Song <so...@gmail.com>
Authored: Tue Apr 10 19:28:43 2018 -0700
Committer: Gilbert Song <so...@gmail.com>
Committed: Thu Jun 7 12:11:38 2018 -0700

----------------------------------------------------------------------
 docs/configuration/agent.md                     | 10 ++-
 src/hdfs/hdfs.cpp                               | 56 ++++++++++++++
 src/hdfs/hdfs.hpp                               |  8 ++
 .../mesos/provisioner/docker/local_puller.cpp   | 77 +++++++++++++++++---
 .../mesos/provisioner/docker/local_puller.hpp   |  6 +-
 .../mesos/provisioner/docker/puller.cpp         | 12 ++-
 .../mesos/provisioner/docker/store.cpp          |  4 +
 src/slave/flags.cpp                             |  8 +-
 8 files changed, 161 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mesos/blob/044aa6f9/docs/configuration/agent.md
----------------------------------------------------------------------
diff --git a/docs/configuration/agent.md b/docs/configuration/agent.md
index c8e9e6c..d2b6b82 100644
--- a/docs/configuration/agent.md
+++ b/docs/configuration/agent.md
@@ -621,10 +621,12 @@ recovers.
   <td>
 The default url for Mesos containerizer to pull Docker images. It could
 either be a Docker registry server url (i.e: <code>https://registry.docker.io</code>),
-or a local path (i.e: <code>/tmp/docker/images</code>) in which Docker
-image archives (result of <code>docker save</code>) are stored. Note
-that this option won't change the default registry server for Docker
-containerizer. (default: https://registry-1.docker.io)
+or a source that Docker image archives (result of <code>docker save</code>) are
+stored. The Docker archive source could be specified either as a local
+path (i.e: <code>/tmp/docker/images</code>), or as an HDFS URI
+(i.e: <code>hdfs://localhost:8020/archives/</code>) that this option won't change
+the default registry server for Docker containerizer.
+(default: https://registry-1.docker.io)
   </td>
 </tr>
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/044aa6f9/src/hdfs/hdfs.cpp
----------------------------------------------------------------------
diff --git a/src/hdfs/hdfs.cpp b/src/hdfs/hdfs.cpp
index 726925f..3947b69 100644
--- a/src/hdfs/hdfs.cpp
+++ b/src/hdfs/hdfs.cpp
@@ -38,6 +38,8 @@
 #include "common/status_utils.hpp"
 #include "hdfs/hdfs.hpp"
 
+#include "uri/schemes/hdfs.hpp"
+
 using namespace process;
 
 using std::string;
@@ -137,6 +139,60 @@ Try<Owned<HDFS>> HDFS::create(const Option<string>& _hadoop)
 }
 
 
+Try<mesos::URI> HDFS::parse(const string& uri)
+{
+  size_t schemePos = uri.find("://");
+  if (schemePos == string::npos) {
+    return Error("Missing scheme in url string");
+  }
+
+  const string uriPath = uri.substr(schemePos + 3);
+
+  size_t pathPos = uriPath.find_first_of('/');
+  if (pathPos == 0) {
+    return mesos::uri::hdfs(uriPath);
+  }
+
+  // If path is specified in the URL, try to capture the host and path
+  // separately.
+  string host = uriPath;
+  string path = "/";
+  if (pathPos != string::npos) {
+    host = host.substr(0, pathPos);
+    path = uriPath.substr(pathPos);
+  }
+
+  if (host.empty()) {
+    return mesos::uri::hdfs(path);
+  }
+
+  const vector<string> tokens = strings::tokenize(host, ":");
+
+  if (tokens[0].empty()) {
+    return Error("Host not found in url");
+  }
+
+  if (tokens.size() > 2) {
+    return Error("Found multiple ports in url");
+  }
+
+  Option<int> port;
+  if (tokens.size() == 2) {
+    Try<int> numifyPort = numify<int>(tokens[1]);
+    if (numifyPort.isError()) {
+      return Error("Failed to parse port: " + numifyPort.error());
+    }
+
+    port = numifyPort.get();
+  } else {
+    // Default port for HDFS.
+    port = 8020;
+  }
+
+  return mesos::uri::hdfs(path, tokens[0], port.get());
+}
+
+
 // An HDFS client path must be either a full URI or an absolute path. If it is
 // a relative path, prepend "/" to make it absolute. (Note that all URI schemes
 // supported by the HDFS client contain "://" whereas file paths never do.)

http://git-wip-us.apache.org/repos/asf/mesos/blob/044aa6f9/src/hdfs/hdfs.hpp
----------------------------------------------------------------------
diff --git a/src/hdfs/hdfs.hpp b/src/hdfs/hdfs.hpp
index 716d13f..66fd226 100644
--- a/src/hdfs/hdfs.hpp
+++ b/src/hdfs/hdfs.hpp
@@ -28,6 +28,8 @@
 #include <stout/option.hpp>
 #include <stout/try.hpp>
 
+#include <mesos/uri/uri.hpp>
+
 
 // TODO(benh): We should get the hostname:port (or ip:port) of the
 // server via:
@@ -49,6 +51,12 @@ public:
   static Try<process::Owned<HDFS>> create(
       const Option<std::string>& hadoop = None());
 
+  // TODO(gilbert): Remove this helper function once we have URI Parser
+  // support (see MESOS-5254 for details). Ideally, we should support
+  // other schemes (e.g., hftp, s3, s3n etc) with hadoop plugin. It is
+  // hard coded for HDFS for now.
+  static Try<mesos::URI> parse(const std::string& uri);
+
   process::Future<bool> exists(const std::string& path);
   process::Future<Bytes> du(const std::string& path);
   process::Future<Nothing> rm(const std::string& path);

http://git-wip-us.apache.org/repos/asf/mesos/blob/044aa6f9/src/slave/containerizer/mesos/provisioner/docker/local_puller.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/local_puller.cpp b/src/slave/containerizer/mesos/provisioner/docker/local_puller.cpp
index 509be63..df715e2 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/local_puller.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/local_puller.cpp
@@ -32,6 +32,11 @@
 
 #include "common/command_utils.hpp"
 
+#include "hdfs/hdfs.hpp"
+
+#include "uri/schemes/file.hpp"
+#include "uri/schemes/hdfs.hpp"
+
 #include "slave/containerizer/mesos/provisioner/docker/local_puller.hpp"
 #include "slave/containerizer/mesos/provisioner/docker/paths.hpp"
 
@@ -50,10 +55,14 @@ namespace docker {
 class LocalPullerProcess : public Process<LocalPullerProcess>
 {
 public:
-  LocalPullerProcess(const string& _storeDir, const string& _archivesDir)
+  LocalPullerProcess(
+      const string& _storeDir,
+      const URI& _archivesUri,
+      const Shared<uri::Fetcher>& _fetcher)
     : ProcessBase(process::ID::generate("docker-provisioner-local-puller")),
       storeDir(_storeDir),
-      archivesDir(_archivesDir) {}
+      archivesUri(_archivesUri),
+      fetcher(_fetcher) {}
 
   ~LocalPullerProcess() {}
 
@@ -83,22 +92,47 @@ private:
       const string& backend);
 
   const string storeDir;
-  const string archivesDir;
+  const URI archivesUri;
+
+  Shared<uri::Fetcher> fetcher;
 };
 
 
-Try<Owned<Puller>> LocalPuller::create(const Flags& flags)
+static Try<URI> parseUri(const string& uri)
+{
+  if (strings::startsWith(uri, "/")) {
+    return uri::file(uri);
+  }
+
+  return HDFS::parse(uri);
+}
+
+
+Try<Owned<Puller>> LocalPuller::create(
+    const Flags& flags,
+    const Shared<uri::Fetcher>& fetcher)
 {
   // This should already been verified at puller.cpp.
-  if (!strings::startsWith(flags.docker_registry, "/")) {
-    return Error("Expecting registry url starting with '/'");
+  if (!strings::startsWith(flags.docker_registry, "/") &&
+      !strings::startsWith(flags.docker_registry, "hdfs://")) {
+    return Error("Expecting registry url starting with '/' or 'hdfs'");
+  }
+
+  Try<URI> uri = parseUri(flags.docker_registry);
+  if (uri.isError()) {
+    return Error(
+        "Failed to parse the agent flag --docker_registry '" +
+        flags.docker_registry + "': " + uri.error());
   }
 
   VLOG(1) << "Creating local puller with docker registry '"
           << flags.docker_registry << "'";
 
   Owned<LocalPullerProcess> process(
-      new LocalPullerProcess(flags.docker_store_dir, flags.docker_registry));
+      new LocalPullerProcess(
+          flags.docker_store_dir,
+          uri.get(),
+          fetcher));
 
   return Owned<Puller>(new LocalPuller(process));
 }
@@ -140,14 +174,37 @@ Future<vector<string>> LocalPullerProcess::pull(
 {
   // TODO(jieyu): We need to handle the case where the image reference
   // contains a slash '/'.
+  const string image = stringify(reference);
+
+  // TODO(gilbert): Support 'http' and 'https'.
+  if (archivesUri.scheme() == "hdfs") {
+    URI uri = archivesUri;
+    uri.set_path(paths::getImageArchiveTarPath(archivesUri.path(), image));
+
+    VLOG(1) << "Fetching image '" << reference
+            << "' from '" << uri
+            << "' to '" << directory << "' using HDFS uri fetcher";
+
+    return fetcher->fetch(uri, directory)
+      .then(defer(self(), [=]() -> Future<vector<string>> {
+        const string source = paths::getImageArchiveTarPath(directory, image);
+
+        VLOG(1) << "Untarring image '" << reference
+                << "' from '" << source
+                << "' to '" << directory << "'";
+
+        return command::untar(Path(source), Path(directory))
+          .then(defer(self(), &Self::_pull, reference, directory, backend));
+      }));
+  }
+
   const string tarPath = paths::getImageArchiveTarPath(
-      archivesDir,
-      stringify(reference));
+      archivesUri.path(), image);
 
   if (!os::exists(tarPath)) {
     return Failure(
         "Failed to find archive for image '" +
-        stringify(reference) + "' at '" + tarPath + "'");
+        image + "' at '" + tarPath + "'");
   }
 
   VLOG(1) << "Untarring image '" << reference

http://git-wip-us.apache.org/repos/asf/mesos/blob/044aa6f9/src/slave/containerizer/mesos/provisioner/docker/local_puller.hpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/local_puller.hpp b/src/slave/containerizer/mesos/provisioner/docker/local_puller.hpp
index 4d2e497..37f2510 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/local_puller.hpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/local_puller.hpp
@@ -21,6 +21,8 @@
 
 #include <mesos/docker/spec.hpp>
 
+#include <mesos/uri/fetcher.hpp>
+
 #include "slave/containerizer/mesos/provisioner/docker/puller.hpp"
 
 #include "slave/flags.hpp"
@@ -42,7 +44,9 @@ class LocalPullerProcess;
 class LocalPuller : public Puller
 {
 public:
-  static Try<process::Owned<Puller>> create(const Flags& flags);
+  static Try<process::Owned<Puller>> create(
+      const Flags& flags,
+      const process::Shared<uri::Fetcher>& fetcher);
 
   ~LocalPuller();
 

http://git-wip-us.apache.org/repos/asf/mesos/blob/044aa6f9/src/slave/containerizer/mesos/provisioner/docker/puller.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/puller.cpp b/src/slave/containerizer/mesos/provisioner/docker/puller.cpp
index 647cf05..cb4248b 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/puller.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/puller.cpp
@@ -36,8 +36,16 @@ Try<Owned<Puller>> Puller::create(
     const Shared<uri::Fetcher>& fetcher,
     SecretResolver* secretResolver)
 {
-  if (strings::startsWith(flags.docker_registry, "/")) {
-    Try<Owned<Puller>> puller = LocalPuller::create(flags);
+  // TODO(gilbert): Consider to introduce a new protobuf API to
+  // represent docker image by an optional URI in Image::Docker,
+  // so that the source of docker images are not necessarily from
+  // the agent flag.
+  // TODO(gilbert): Support multiple pullers simultaneously in
+  // docker store, so that users could prefer pulling from either
+  // image tarballs or the remote docker registry.
+  if (strings::startsWith(flags.docker_registry, "/") ||
+      strings::startsWith(flags.docker_registry, "hdfs://")) {
+    Try<Owned<Puller>> puller = LocalPuller::create(flags, fetcher);
     if (puller.isError()) {
       return Error("Failed to create local puller: " + puller.error());
     }

http://git-wip-us.apache.org/repos/asf/mesos/blob/044aa6f9/src/slave/containerizer/mesos/provisioner/docker/store.cpp
----------------------------------------------------------------------
diff --git a/src/slave/containerizer/mesos/provisioner/docker/store.cpp b/src/slave/containerizer/mesos/provisioner/docker/store.cpp
index f6b8f39..6e7dc44 100644
--- a/src/slave/containerizer/mesos/provisioner/docker/store.cpp
+++ b/src/slave/containerizer/mesos/provisioner/docker/store.cpp
@@ -144,6 +144,10 @@ Try<Owned<slave::Store>> Store::create(
   _flags.docker_stall_timeout = flags.fetcher_stall_timeout;
 #endif
 
+  if (flags.hadoop_home.isSome()) {
+    _flags.hadoop_client = path::join(flags.hadoop_home.get(), "bin", "hadoop");
+  }
+
   Try<Owned<uri::Fetcher>> fetcher = uri::fetcher::create(_flags);
   if (fetcher.isError()) {
     return Error("Failed to create the URI fetcher: " + fetcher.error());

http://git-wip-us.apache.org/repos/asf/mesos/blob/044aa6f9/src/slave/flags.cpp
----------------------------------------------------------------------
diff --git a/src/slave/flags.cpp b/src/slave/flags.cpp
index 06c5421..8e448d8 100644
--- a/src/slave/flags.cpp
+++ b/src/slave/flags.cpp
@@ -200,9 +200,11 @@ mesos::internal::slave::Flags::Flags()
       "docker_registry",
       "The default url for Mesos containerizer to pull Docker images. It\n"
       "could either be a Docker registry server url (i.e: `https://registry.docker.io`),\n" // NOLINT(whitespace/line_length)
-      "or a local path (i.e: `/tmp/docker/images`) in which Docker image\n"
-      "archives (result of `docker save`) are stored. Note that this option\n"
-      "won't change the default registry server for Docker containerizer.",
+      "or a source that Docker image archives (result of `docker save`) are\n"
+      "stored. The Docker archive source could be specified either as a local\n"
+      "path (i.e: `/tmp/docker/images`), or as an HDFS URI\n"
+      "(i.e: `hdfs://localhost:8020/archives/`) that this option won't change\n"
+      "the default registry server for Docker containerizer.",
       "https://registry-1.docker.io");
 
   add(&Flags::docker_store_dir,