You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@mesos.apache.org by jp...@apache.org on 2018/10/17 23:14:31 UTC

[mesos] branch master updated (f9627b9 -> e0bc091)

This is an automated email from the ASF dual-hosted git repository.

jpeach pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git.


    from f9627b9  Fixed the FreeBSD MACRO as '__FreeBSD__' in posix/pipe.hpp.
     new 9108078  Add the output file to the hash on CommandInfo::URI.
     new e0bc091  Fixed fetcher deadlock with duplicate URIs.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 include/mesos/type_utils.hpp        |  5 ++--
 include/mesos/v1/mesos.hpp          |  1 +
 src/launcher/fetcher.cpp            | 30 ++++++++++---------
 src/slave/containerizer/fetcher.cpp | 14 +++++++++
 src/tests/fetcher_tests.cpp         | 57 ++++++++++++++++++++++++++++++++++++-
 5 files changed, 90 insertions(+), 17 deletions(-)

[mesos] 02/02: Fixed fetcher deadlock with duplicate URIs.

Posted by jp...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jpeach pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit e0bc091351fd7f26b41eed46d15f4eda0f0d3bf0
Author: James Peach <jp...@apache.org>
AuthorDate: Wed Oct 17 15:22:31 2018 -0700

    Fixed fetcher deadlock with duplicate URIs.
    
    If a fetch request contains duplicate URIs that are not already
    in the cache, the fetcher would erroneously expect that some other
    fetch process is going to download that cache entry. It will then
    wait for a Future that will never complete.
    
    The fix is to track whether the cache entry was created in this
    fetch, and in that case to simply allow the duplicate URI. In
    the fetcher, we check the cache before downloading so that a URIs
    can be fetched to distinct output files without being downloaded
    multiple times.
    
    Review: https://reviews.apache.org/r/68587/
---
 src/launcher/fetcher.cpp            | 30 ++++++++++---------
 src/slave/containerizer/fetcher.cpp | 14 +++++++++
 src/tests/fetcher_tests.cpp         | 57 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 86 insertions(+), 15 deletions(-)

diff --git a/src/launcher/fetcher.cpp b/src/launcher/fetcher.cpp
index ef8d7eb..9cb8196 100644
--- a/src/launcher/fetcher.cpp
+++ b/src/launcher/fetcher.cpp
@@ -222,8 +222,6 @@ static Try<string> download(
   // Trim leading whitespace for 'sourceUri'.
   const string sourceUri = strings::trim(_sourceUri, strings::PREFIX);
 
-  LOG(INFO) << "Fetching URI '" << sourceUri << "'";
-
   Try<Nothing> validation = Fetcher::validateUri(sourceUri);
   if (validation.isError()) {
     return Error(validation.error());
@@ -289,7 +287,8 @@ static Try<string> fetchBypassingCache(
     const Option<string>& frameworksHome,
     const Option<Duration>& stallTimeout)
 {
-  LOG(INFO) << "Fetching directly into the sandbox directory";
+  LOG(INFO) << "Fetching '" << uri.value()
+            << "' directly into the sandbox directory";
 
   // TODO(mrbrowning): Factor out duplicated processing of "output_file" field
   // here and in fetchFromCache into a separate helper function.
@@ -347,7 +346,7 @@ static Try<string> fetchFromCache(
     const string& cacheDirectory,
     const string& sandboxDirectory)
 {
-  LOG(INFO) << "Fetching from cache";
+  LOG(INFO) << "Fetching URI '" << item.uri().value() << "' from cache";
 
   if (item.uri().has_output_file()) {
     string dirname = Path(item.uri().output_file()).dirname();
@@ -428,16 +427,19 @@ static Try<string> fetchThroughCache(
     << "Fetcher cache directory was expected to exist but was not found";
 
   if (item.action() == FetcherInfo::Item::DOWNLOAD_AND_CACHE) {
-    LOG(INFO) << "Downloading into cache";
-
-    Try<string> downloaded = download(
-        item.uri().value(),
-        path::join(cacheDirectory.get(), item.cache_filename()),
-        frameworksHome,
-        stallTimeout);
-
-    if (downloaded.isError()) {
-      return Error(downloaded.error());
+    const string cachePath =
+      path::join(cacheDirectory.get(), item.cache_filename());
+
+    if (!os::exists(cachePath)) {
+      Try<string> downloaded = download(
+          item.uri().value(),
+          cachePath,
+          frameworksHome,
+          stallTimeout);
+
+      if (downloaded.isError()) {
+        return Error(downloaded.error());
+      }
     }
   }
 
diff --git a/src/slave/containerizer/fetcher.cpp b/src/slave/containerizer/fetcher.cpp
index 17f5388..e848c86 100644
--- a/src/slave/containerizer/fetcher.cpp
+++ b/src/slave/containerizer/fetcher.cpp
@@ -406,12 +406,25 @@ Future<Nothing> FetcherProcess::fetch(
   // entry.
   hashmap<CommandInfo::URI, Option<Future<shared_ptr<Cache::Entry>>>> entries;
 
+  // When we create new entries, we need to track whether we already have
+  // a entry for the corresponding URI value. This handles the case where
+  // multiple entries have the same URI value (but hash differently because
+  // they differ in other fields). If we see the same URI value multiple
+  // times, then we simply add references the initial entry.
+  hashmap<string, shared_ptr<Cache::Entry>> newEntries;
+
   foreach (const CommandInfo::URI& uri, commandInfo.uris()) {
     if (!uri.cache()) {
       entries[uri] = None();
       continue;
     }
 
+    if (newEntries.contains(uri.value())) {
+      newEntries[uri.value()]->reference();
+      entries[uri] = newEntries.at(uri.value());
+      continue;
+    }
+
     // Check if this is already in the cache (but not necessarily
     // downloaded).
     const Option<shared_ptr<Cache::Entry>> entry =
@@ -429,6 +442,7 @@ Future<Nothing> FetcherProcess::fetch(
       shared_ptr<Cache::Entry> newEntry =
         cache.create(cacheDirectory, commandUser, uri);
 
+      newEntries.put(uri.value(), newEntry);
       newEntry->reference();
 
       entries[uri] =
diff --git a/src/tests/fetcher_tests.cpp b/src/tests/fetcher_tests.cpp
index f3ea709..283238c 100644
--- a/src/tests/fetcher_tests.cpp
+++ b/src/tests/fetcher_tests.cpp
@@ -129,6 +129,61 @@ TEST_F(FetcherTest, FileURI)
 }
 
 
+// Verify that the fetcher cache correctly handles duplicates
+// of the same URI in the CommandInfo.
+TEST_F(FetcherTest, DuplicateFileURI)
+{
+  string fromDir = path::join(os::getcwd(), "from");
+  ASSERT_SOME(os::mkdir(fromDir));
+  string testFile = path::join(fromDir, "test");
+  EXPECT_SOME(os::write(testFile, "data"));
+
+  slave::Flags flags;
+  flags.launcher_dir = getLauncherDir();
+
+  ContainerID containerId;
+  containerId.set_value(id::UUID::random().toString());
+
+  CommandInfo commandInfo;
+
+  commandInfo.add_uris()->set_value(uri::from_path(testFile));
+  commandInfo.add_uris()->set_value(uri::from_path(testFile));
+  commandInfo.add_uris()->set_value(uri::from_path(testFile));
+
+  // Make each URI container different, even though they all
+  // refer to the same URI.
+  commandInfo.mutable_uris(0)->set_cache(true);
+  commandInfo.mutable_uris(1)->set_cache(true);
+  commandInfo.mutable_uris(2)->set_cache(true);
+  commandInfo.mutable_uris(0)->set_output_file("one");
+  commandInfo.mutable_uris(1)->set_output_file("two");
+  commandInfo.mutable_uris(2)->set_output_file("three");
+
+  EXPECT_FALSE(os::exists("one"));
+  EXPECT_FALSE(os::exists("two"));
+  EXPECT_FALSE(os::exists("three"));
+
+  Fetcher fetcher(flags);
+
+  Future<Nothing> fetch = fetcher.fetch(
+      containerId, commandInfo, os::getcwd(), None());
+  AWAIT_READY(fetch);
+
+  EXPECT_TRUE(os::exists("one"));
+  EXPECT_TRUE(os::exists("two"));
+  EXPECT_TRUE(os::exists("three"));
+
+  // This is still only 1 task fetch.
+  verifyMetrics(1, 0);
+
+  // We should have only consumed cache space for a single URI.
+  JSON::Object metrics = Metrics();
+  EXPECT_SOME_EQ(
+    os::stat::size(testFile)->bytes(),
+    metrics.at<JSON::Number>("containerizer/fetcher/cache_size_used_bytes"));
+}
+
+
 TEST_F(FetcherTest, LogSuccessToStderr)
 {
   // Valid test file with data.
@@ -169,7 +224,7 @@ TEST_F(FetcherTest, LogSuccessToStderr)
   const Try<string> stderrContent = os::read(stderrFile);
   EXPECT_SOME(stderrContent);
   EXPECT_TRUE(strings::contains(
-      stderrContent.get(), "Fetching directly into the sandbox directory"));
+      stderrContent.get(), "directly into the sandbox directory"));
   EXPECT_TRUE(strings::contains(
       stderrContent.get(), "Successfully fetched all URIs into"));
 }

[mesos] 01/02: Add the output file to the hash on CommandInfo::URI.

Posted by jp...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jpeach pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/mesos.git

commit 9108078aa2c41e2f88e4c7d66c0159b1810d89de
Author: James Peach <jp...@apache.org>
AuthorDate: Wed Oct 17 15:22:27 2018 -0700

    Add the output file to the hash on CommandInfo::URI.
    
    The `output_file` field of the `CommandInfo::URI` is not combined
    into the hash value. This means that the fetcher does not consider two
    messages that differ only by the `output_file` as different. By adding
    the `output_file` to the hash it is possible for a task to fetch the
    same URI to multiple, distinct ouptuts.
    
    Review: https://reviews.apache.org/r/68586/
---
 include/mesos/type_utils.hpp | 5 +++--
 include/mesos/v1/mesos.hpp   | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/mesos/type_utils.hpp b/include/mesos/type_utils.hpp
index 19ea817..aa61c0c 100644
--- a/include/mesos/type_utils.hpp
+++ b/include/mesos/type_utils.hpp
@@ -510,11 +510,11 @@ inline std::ostream& operator<<(
 namespace std {
 
 template <>
-struct hash<mesos::CommandInfo_URI>
+struct hash<mesos::CommandInfo::URI>
 {
   typedef size_t result_type;
 
-  typedef mesos::CommandInfo_URI argument_type;
+  typedef mesos::CommandInfo::URI argument_type;
 
   result_type operator()(const argument_type& uri) const
   {
@@ -529,6 +529,7 @@ struct hash<mesos::CommandInfo_URI>
     }
 
     boost::hash_combine(seed, uri.value());
+    boost::hash_combine(seed, uri.output_file());
     return seed;
   }
 };
diff --git a/include/mesos/v1/mesos.hpp b/include/mesos/v1/mesos.hpp
index fda3eb4..452bcf2 100644
--- a/include/mesos/v1/mesos.hpp
+++ b/include/mesos/v1/mesos.hpp
@@ -529,6 +529,7 @@ struct hash<mesos::v1::CommandInfo::URI>
     }
 
     boost::hash_combine(seed, uri.value());
+    boost::hash_combine(seed, uri.output_file());
     return seed;
   }
 };